From b17221628922ab7898986ea8bc39028ae56ea491 Mon Sep 17 00:00:00 2001 From: Daniel Tsiang <74436899+DanielTsiang@users.noreply.github.com> Date: Sun, 5 Nov 2023 23:30:00 +0000 Subject: [PATCH] Merge master branch of filesystem_spec (#6) Merge master branch of filesystem_spec into PR branch --------- Co-authored-by: Martin Durant Co-authored-by: Guido Diepen Co-authored-by: Martin Durant --- .github/workflows/main.yaml | 32 +++++++++++++++---------------- ci/environment-friends.yml | 2 +- ci/environment-py38.yml | 2 +- ci/environment-typecheck.yml | 4 +++- ci/environment-win.yml | 2 +- fsspec/generic.py | 4 ++++ fsspec/implementations/webhdfs.py | 30 +++++++++++++++++++++++++---- fsspec/utils.py | 6 +++--- pyproject.toml | 2 +- setup.cfg | 4 ++-- 10 files changed, 58 insertions(+), 30 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 590fafead..dd3a8d3fe 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -71,22 +71,22 @@ jobs: python-version: "3.11" - uses: pre-commit/action@main - typecheck: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Setup conda - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: ci/environment-typecheck.yml - - - name: mypy - shell: bash -l {0} - run: | - mypy fsspec - +# typecheck: +# runs-on: ubuntu-latest +# steps: +# - name: Checkout +# uses: actions/checkout@v3 +# +# - name: Setup conda +# uses: mamba-org/setup-micromamba@v1 +# with: +# environment-file: ci/environment-typecheck.yml +# +# - name: mypy +# shell: bash -l {0} +# run: | +# mypy fsspec +# downstream: name: downstream runs-on: ubuntu-latest diff --git a/ci/environment-friends.yml b/ci/environment-friends.yml index 1d6b7d61b..b2b3c1b25 100644 --- a/ci/environment-friends.yml +++ b/ci/environment-friends.yml @@ -4,7 +4,7 @@ channels: dependencies: - python=3.9 - pytest - - pytest-asyncio + - pytest-asyncio !=0.22.0 - pytest-benchmark - pytest-cov - pytest-mock diff --git a/ci/environment-py38.yml b/ci/environment-py38.yml index 6bd0e1afb..41ee00898 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py38.yml @@ -21,7 +21,7 @@ dependencies: - pyftpdlib - cloudpickle - pytest - - pytest-asyncio + - pytest-asyncio !=0.22.0 - pytest-benchmark - pytest-cov - pytest-mock diff --git a/ci/environment-typecheck.yml b/ci/environment-typecheck.yml index 59e6abdf5..1b7c482f5 100644 --- a/ci/environment-typecheck.yml +++ b/ci/environment-typecheck.yml @@ -2,7 +2,7 @@ name: test_env channels: - conda-forge dependencies: - - mypy=1.3 + - mypy=1.4.1 - pyarrow - python=3.8 - pip @@ -10,4 +10,6 @@ dependencies: - types-paramiko - types-requests - types-tqdm + - types-paramiko + - types-PyYAML - types-ujson diff --git a/ci/environment-win.yml b/ci/environment-win.yml index 8d22ffc3a..e621fff90 100644 --- a/ci/environment-win.yml +++ b/ci/environment-win.yml @@ -14,7 +14,7 @@ dependencies: - fastparquet - pandas - pytest - - pytest-asyncio + - pytest-asyncio !=0.22.0 - pytest-benchmark - pytest-cov - pytest-mock diff --git a/fsspec/generic.py b/fsspec/generic.py index 85301cc5f..290bb436a 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -171,6 +171,10 @@ def __init__(self, default_method="default", **kwargs): self.method = default_method super().__init__(**kwargs) + def _parent(self, path): + fs = _resolve_fs(path, self.method) + return fs.unstrip_protocol(fs._parent(path)) + def _strip_protocol(self, path): # normalization only fs = _resolve_fs(path, self.method) diff --git a/fsspec/implementations/webhdfs.py b/fsspec/implementations/webhdfs.py index 2a57170ea..53e0df53b 100644 --- a/fsspec/implementations/webhdfs.py +++ b/fsspec/implementations/webhdfs.py @@ -21,7 +21,7 @@ class WebHDFS(AbstractFileSystem): """ Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways. - Three auth mechanisms are supported: + Four auth mechanisms are supported: insecure: no auth is done, and the user is assumed to be whoever they say they are (parameter ``user``), or a predefined value such as @@ -34,6 +34,8 @@ class WebHDFS(AbstractFileSystem): service. Indeed, this client can also generate such tokens when not insecure. Note that tokens expire, but can be renewed (by a previously specified user) and may allow for proxying. + basic-auth: used when both parameter ``user`` and parameter ``password`` + are provided. """ @@ -47,6 +49,7 @@ def __init__( kerberos=False, token=None, user=None, + password=None, proxy_to=None, kerb_kwargs=None, data_proxy=None, @@ -68,6 +71,9 @@ def __init__( given user: str or None If given, assert the user name to connect with + password: str or None + If given, assert the password to use for basic auth. If password + is provided, user must be provided also proxy_to: str or None If given, the user has the authority to proxy, and this value is the user in who's name actions are taken @@ -102,8 +108,19 @@ def __init__( " token" ) self.pars["delegation"] = token - if user is not None: - self.pars["user.name"] = user + self.user = user + self.password = password + + if password is not None: + if user is None: + raise ValueError( + "If passing a password, the user must also be" + "set in order to set up the basic-auth" + ) + else: + if user is not None: + self.pars["user.name"] = user + if proxy_to is not None: self.pars["doas"] = proxy_to if kerberos and user is not None: @@ -126,8 +143,13 @@ def _connect(self): self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs) + if self.user is not None and self.password is not None: + from requests.auth import HTTPBasicAuth + + self.session.auth = HTTPBasicAuth(self.user, self.password) + def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs): - url = self.url + quote(path or "") + url = self._apply_proxy(self.url + quote(path or "")) args = kwargs.copy() args.update(self.pars) args["op"] = op.upper() diff --git a/fsspec/utils.py b/fsspec/utils.py index 38d878a06..34f1ad821 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -320,7 +320,7 @@ def tokenize(*args: Any, **kwargs: Any) -> str: h = md5(str(args).encode()) except ValueError: # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380 - h = md5(str(args).encode(), usedforsecurity=False) # type: ignore[call-arg] + h = md5(str(args).encode(), usedforsecurity=False) return h.hexdigest() @@ -631,7 +631,7 @@ def atomic_write(path: str, mode: str = "wb"): def _translate(pat, STAR, QUESTION_MARK): # Copied from: https://github.com/python/cpython/pull/106703. - res = [] + res: list[str] = [] add = res.append i, n = 0, len(pat) while i < n: @@ -709,7 +709,7 @@ def glob_translate(pat): # recursive=True, include_hidden=True, seps=None """Translate a pathname with shell wildcards to a regular expression.""" if os.path.altsep: - seps = (os.path.sep, os.path.altsep) + seps = os.path.sep + os.path.altsep else: seps = os.path.sep escaped_seps = "".join(map(re.escape, seps)) diff --git a/pyproject.toml b/pyproject.toml index cfa8bbaf9..ab9055633 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.black] -target_version = ['py37', 'py38'] +target_version = ['py310'] line-length = 88 skip-string-normalization = false exclude = ''' diff --git a/setup.cfg b/setup.cfg index 42e7ad282..67467cd20 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,8 +43,8 @@ follow_imports = normal ignore_missing_imports = True enable_error_code = ignore-without-code,truthy-bool,truthy-iterable,unused-awaitable -disallow_untyped_decorators = True -strict_equality = True +disallow_untyped_decorators = False +strict_equality = False warn_redundant_casts = True warn_unused_configs = True warn_unused_ignores = True