Skip to content

Commit

Permalink
enable passing of a full path to file_stem
Browse files Browse the repository at this point in the history
  • Loading branch information
mattseddon committed Aug 15, 2024
1 parent ea02700 commit 0d18b13
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 8 deletions.
2 changes: 1 addition & 1 deletion examples/computer_vision/openimage-detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def openimage_detect(args):
.filter(C("file.path").glob("*.jpg") | C("file.path").glob("*.json"))
.agg(
openimage_detect,
partition_by=path.file_stem(path.name(C("file.path"))),
partition_by=path.file_stem(C("file.path")),
params=["file"],
output={"file": File, "bbox": BBox},
)
Expand Down
4 changes: 2 additions & 2 deletions examples/get_started/common_sql_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def num_chars_udf(file):

(
dc.mutate(
stem=path.file_stem(path.name(C("file.path"))),
ext=path.file_ext(path.name(C("file.path"))),
stem=path.file_stem(C("file.path")),
ext=path.file_ext(C("file.path")),
)
.select("file.path", "stem", "ext")
.show(5)
Expand Down
34 changes: 30 additions & 4 deletions src/datachain/sql/sqlite/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,19 +221,45 @@ def path_name(path):
return func.ltrim(func.substr(path, func.length(path_parent(path)) + 1), slash)


def path_file_ext_length(path):
name = path_name(path)
def name_file_ext_length(name):
expr = func.length(name) - func.length(
func.rtrim(name, func.replace(name, dot, empty_str))
)
return case((func.instr(name, dot) == 0, 0), else_=expr)


def path_file_ext_length(path):
name = path_name(path)
return name_file_ext_length(name)


def path_file_stem(path):
return func.rtrim(
func.substr(path, 1, func.length(path) - path_file_ext_length(path)), dot
path_length = func.length(path)
parent_length = func.length(path_parent(path))

name_expr = func.rtrim(
func.substr(
path,
1,
path_length - name_file_ext_length(path),
),
dot,
)

full_path_expr = func.ltrim(
func.rtrim(
func.substr(
path,
parent_length + 1,
path_length - parent_length - path_file_ext_length(path),
),
dot,
),
slash,
)

return case((func.instr(path, slash) == 0, name_expr), else_=full_path_expr)


def path_file_ext(path):
return func.substr(path, func.length(path) - path_file_ext_length(path) + 1)
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/sql/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def split_parent(path):


def file_stem(path):
return pp.splitext(path)[0].rstrip(".")
name = split_parent(path)[1]
return pp.splitext(name)[0].rstrip(".")


def file_ext(path):
Expand Down

0 comments on commit 0d18b13

Please sign in to comment.