Skip to content

Commit

Permalink
adlfs: don't append / to dirs in find() and fix glob()
Browse files Browse the repository at this point in the history
This makes it consistent with localfs/gcsfs/s3fs and also makes glob work
after changes in fsspec/filesystem_spec#1382
  • Loading branch information
efiop committed Dec 23, 2023
1 parent ff919cf commit 9b5544d
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 57 deletions.
29 changes: 9 additions & 20 deletions adlfs/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,12 +880,7 @@ async def _details(

return output

def find(self, path, withdirs=False, prefix="", **kwargs):
return sync(
self.loop, self._find, path=path, withdirs=withdirs, prefix=prefix, **kwargs
)

async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwargs):
async def _find(self, path, withdirs=False, prefix="", **kwargs):
"""List all files below path.
Like posix ``find`` command without conditions.
Expand Down Expand Up @@ -931,11 +926,8 @@ async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwar
for info in infos:
name = _name = info["name"]
while True:
parent_dir = self._parent(_name).rstrip("/") + "/"
if (
parent_dir not in dir_set
and parent_dir != full_path.strip("/") + "/"
):
parent_dir = self._parent(_name).rstrip("/")
if parent_dir not in dir_set and parent_dir != full_path.strip("/"):
dir_set.add(parent_dir)
dirs[parent_dir] = {
"name": parent_dir,
Expand All @@ -960,8 +952,6 @@ async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwar
files[file["name"]] = file

if withdirs:
if not with_parent:
dirs.pop(target_path, None)
files.update(dirs)
files = {k: v for k, v in files.items() if k.startswith(target_path)}
names = sorted([n for n in files.keys()])
Expand Down Expand Up @@ -1160,7 +1150,9 @@ async def _rm(
"""
if expand_path:
path = await self._expand_path(
path, recursive=recursive, maxdepth=maxdepth, with_parent=True
path,
recursive=recursive,
maxdepth=maxdepth,
)
elif isinstance(path, str):
path = [path]
Expand Down Expand Up @@ -1491,18 +1483,16 @@ async def _expand_path(
self, path, recursive=False, maxdepth=None, skip_noexist=True, **kwargs
):
"""Turn one or more globs or directories into a list of all matching files"""
with_parent = kwargs.get(
"with_parent", False
) # Sets whether to return the parent dir

if isinstance(path, list):
path = [f"{p.strip('/')}" for p in path if not p.endswith("*")]
else:
if not path.endswith("*"):
path = f"{path.strip('/')}"
if isinstance(path, str):
out = await self._expand_path(
[path], recursive, maxdepth, with_parent=with_parent
[path],
recursive,
maxdepth,
)
else:
out = set()
Expand All @@ -1526,7 +1516,6 @@ async def _expand_path(
await self._find(
glob_p,
withdirs=True,
with_parent=with_parent,
version_id=version_id,
)
)
Expand Down
72 changes: 36 additions & 36 deletions adlfs/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,35 +450,35 @@ def test_find(storage):

# all files and directories
assert fs.find("data/root", withdirs=True) == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
]
assert fs.find("data/root/", withdirs=True) == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
Expand All @@ -494,17 +494,17 @@ def test_find(storage):
]

assert fs.find("data/root", prefix="a", withdirs=True) == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
]

find_results = fs.find("data/root", prefix="a1", withdirs=True, detail=True)
assert_blobs_equals(
list(find_results.values()),
[
{"name": "data/root/a1/", "size": 0, "type": "directory"},
{"name": "data/root/a1", "size": 0, "type": "directory"},
{
"name": "data/root/a1/file1.txt",
"size": 10,
Expand Down Expand Up @@ -551,12 +551,12 @@ def test_glob(storage):

# top-level contents of a directory
assert fs.glob("data/root/*") == [
"data/root/a/",
"data/root/a1/",
"data/root/b/",
"data/root/c/",
"data/root/d/",
"data/root/e+f/",
"data/root/a",
"data/root/a1",
"data/root/b",
"data/root/c",
"data/root/d",
"data/root/e+f",
"data/root/rfile.txt",
]

Expand Down Expand Up @@ -606,36 +606,36 @@ def test_glob(storage):

# all files
assert fs.glob("data/root/**") == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
]
assert fs.glob("data/roo**") == [
"data/root/",
"data/root/a/",
assert fs.glob("data/roo*/**") == [
"data/root",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
Expand Down Expand Up @@ -1191,7 +1191,7 @@ def test_dask_parquet(storage):
write_metadata_file=True,
)
assert fs.glob("test/test_group3.parquet/*") == [
"test/test_group3.parquet/A=1/",
"test/test_group3.parquet/A=1",
"test/test_group3.parquet/_common_metadata",
"test/test_group3.parquet/_metadata",
]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies = [
"azure-datalake-store>=0.0.46,<0.1",
"azure-identity",
"azure-storage-blob>=12.12.0",
"fsspec>=2023.9.0",
"fsspec>=2023.12.0",
"aiohttp>=3.7.0",
]

Expand Down

0 comments on commit 9b5544d

Please sign in to comment.