Skip to content

Commit

Permalink
add regexp replace
Browse files Browse the repository at this point in the history
  • Loading branch information
EdwardLi-coder committed Aug 18, 2024
1 parent 9642030 commit 53b0055
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/datachain/sql/functions/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,18 @@ class split(GenericFunction): # noqa: N801
inherit_cache = True


class regexp_replace(GenericFunction): # noqa: N801
"""
Replaces substring that match a regular expression.
"""

type = String()
package = "string"
name = "regexp_replace"
inherit_cache = True


compiler_not_implemented(regexp_replace)

compiler_not_implemented(length)
compiler_not_implemented(split)
16 changes: 16 additions & 0 deletions src/datachain/sql/sqlite/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
import sqlite3
from collections.abc import Iterable
from datetime import MAXYEAR, MINYEAR, datetime, timezone
Expand Down Expand Up @@ -178,9 +179,15 @@ def create_vector_functions(conn):

_registered_function_creators["vector_functions"] = create_vector_functions

def sqlite_regexp_replace(string: str, pattern: str, replacement: str) -> str:
return re.sub(pattern, replacement, string)

Check warning on line 183 in src/datachain/sql/sqlite/base.py

View check run for this annotation

Codecov / codecov/patch

src/datachain/sql/sqlite/base.py#L183

Added line #L183 was not covered by tests

def create_string_functions(conn):
conn.create_function("split", 2, sqlite_string_split, deterministic=True)
conn.create_function("split", 3, sqlite_string_split, deterministic=True)
conn.create_function(
"regexp_replace", 3, sqlite_regexp_replace, deterministic=True
)

_registered_function_creators["string_functions"] = create_string_functions

Expand Down Expand Up @@ -239,6 +246,10 @@ def path_file_ext(path):
return func.substr(path, func.length(path) - path_file_ext_length(path) + 1)


def compile_regexp_replace(element, compiler, **kwargs):
return f"regexp_replace({compiler.process(element.clauses, **kwargs)})"

Check warning on line 250 in src/datachain/sql/sqlite/base.py

View check run for this annotation

Codecov / codecov/patch

src/datachain/sql/sqlite/base.py#L250

Added line #L250 was not covered by tests


def compile_path_parent(element, compiler, **kwargs):
return compiler.process(path_parent(*element.clauses.clauses), **kwargs)

Expand Down Expand Up @@ -370,3 +381,8 @@ def load_usearch_extension(conn) -> bool:

except Exception: # noqa: BLE001
return False


@compiles(string.regexp_replace, "sqlite")
def _compile_regexp_replace_sqlite(element, compiler, **kwargs):
return compile_regexp_replace(element, compiler, **kwargs)

Check warning on line 388 in src/datachain/sql/sqlite/base.py

View check run for this annotation

Codecov / codecov/patch

src/datachain/sql/sqlite/base.py#L388

Added line #L388 was not covered by tests

0 comments on commit 53b0055

Please sign in to comment.