From 79a0b07b7b05377b7a2f71da9ae12195e74f8e0e Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Mon, 27 Nov 2023 16:57:28 -0600 Subject: [PATCH] test: improve path object split tests (#1039) * feat: add fsspec as required dependency (#1021) * fsspec requirements * simplify fsspec import * use loop property * correctly create schemes list * feat: set fsspec as default source (#1023) * feat: add fsspec as required dependency (#1021) * fsspec requirements * simplify fsspec import * use loop property * correctly create schemes list * remove deprecated handlers from docs * simplify source selection * return object source * pickle executor * rename test * test more handlers * option to check writeable file-like object * rename test * explicitly set handler * fix s3 source * rename test * Revert "fix s3 source" This reverts commit e76fdbb1e3011a2ad3bd9ea8cb4634b43caa6af6. * sesparate PR for s3 fix (https://github.com/scikit-hep/uproot5/pull/1024) * strip file:// * rename test * rename tests * add aiohttp skip * attempt to parse windows paths * test ci * Revert "test ci" This reverts commit 4c1c8a5bdc3cbbab540c125807b1139a51e9c455. * rename test * remove fsspec from test * remove *_handler options * update defaults * do not override default s3 * do not use fsspec for multiprocessing * rename test * fix not selecting object source * missing import * normalize doc * remove helper * never return None as source * remove unnecessary xrootd source default override since fsspec is default now * rename test * add empty class to pass old pickle test * feat: set `fsspec` (`s3fs`) as default handler for s3 paths (#1032) * feat: add fsspec as required dependency (#1021) * fsspec requirements * simplify fsspec import * use loop property * correctly create schemes list * feat: set fsspec as default source (#1023) * feat: add fsspec as required dependency (#1021) * fsspec requirements * simplify fsspec import * use loop property * correctly create schemes list * remove deprecated handlers from docs * simplify source selection * return object source * pickle executor * rename test * test more handlers * option to check writeable file-like object * rename test * explicitly set handler * fix s3 source * rename test * Revert "fix s3 source" This reverts commit e76fdbb1e3011a2ad3bd9ea8cb4634b43caa6af6. * sesparate PR for s3 fix (https://github.com/scikit-hep/uproot5/pull/1024) * strip file:// * rename test * rename tests * add aiohttp skip * attempt to parse windows paths * test ci * Revert "test ci" This reverts commit 4c1c8a5bdc3cbbab540c125807b1139a51e9c455. * rename test * remove fsspec from test * remove *_handler options * update defaults * do not override default s3 * do not use fsspec for multiprocessing * rename test * fix not selecting object source * missing import * normalize doc * remove helper * never return None as source * remove unnecessary xrootd source default override since fsspec is default now * rename test * add empty class to pass old pickle test * set version to 5.2.0rc1 (release candidate) * set s3fs as default for s3 * test different handlers * correct serialization of fsspec source * feat: simplify object path split (#1028) * simplify object path split * add example from https://github.com/scikit-hep/uproot5/issues/975 * fix tests * add more test cases * test case update * remove scheme unused regex * feat: fsspec for all non-object writing - %-encoded urls no longer decoded (#1034) * writing goes through fsspec * increase rc version * type hints and docs * add helper methods, create * throw more specific error * add additional test for `create` failure with scheme other than local * simplify source selection * remove windows specific code * raise exception if invalid combination of handler / input (file-like object and fsspec) * use softer check for file-like object * cover problematic case with additional slash (file:///c:/file.root) * test "file:" scheme (no slash) * test backslash * add new test case * split big test in two * retry on socket error * xrootd iterator * iterate over different files * iterate over tree * pytest fixture for test directory * pytest fixture for test directory * add annotation to open argument * remove repeated test --- src/uproot/reading.py | 4 +- src/uproot/version.py | 2 +- tests/conftest.py | 6 ++ ..._dynamic_classes_cant_be_abc_subclasses.py | 8 +- tests/test_0692_fsspec_reading.py | 65 +++++++++++++ tests/test_0976_path_object_split.py | 91 +++++++++++-------- 6 files changed, 130 insertions(+), 46 deletions(-) diff --git a/src/uproot/reading.py b/src/uproot/reading.py index e33b6fc3a..d046116a8 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -9,8 +9,6 @@ """ from __future__ import annotations -from __future__ import annotations - import struct import sys import uuid @@ -25,7 +23,7 @@ def open( - path, + path: str | Path | IO | dict[str | Path | IO, str], *, object_cache=100, array_cache="100 MB", diff --git a/src/uproot/version.py b/src/uproot/version.py index 363472158..7dc2ff112 100644 --- a/src/uproot/version.py +++ b/src/uproot/version.py @@ -12,7 +12,7 @@ import re -__version__ = "5.2.0rc2" +__version__ = "5.2.0rc3" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) diff --git a/tests/conftest.py b/tests/conftest.py index 86683a71d..e3c4b74a4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,7 @@ import contextlib import skhep_testdata from functools import partial +import os # The base http server does not support range requests. Watch https://github.com/python/cpython/issues/86809 for updates from http.server import HTTPServer @@ -69,3 +70,8 @@ def serve_forever(httpd=server): def server(): with serve() as server_url: yield server_url + + +@pytest.fixture(scope="module") +def tests_directory() -> str: + return os.path.dirname(os.path.realpath(__file__)) diff --git a/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py b/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py index 1942589ac..0dd6a4b3e 100644 --- a/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py +++ b/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py @@ -3,11 +3,7 @@ import pickle import os -import pytest -test_directory = os.path.dirname(os.path.realpath(__file__)) - - -def test_pickle(): - with open(os.path.join(test_directory, "samples/h_dynamic.pkl"), "rb") as f: +def test_pickle(tests_directory): + with open(os.path.join(tests_directory, "samples/h_dynamic.pkl"), "rb") as f: assert len(list(pickle.load(f).axis(0))) == 100 diff --git a/tests/test_0692_fsspec_reading.py b/tests/test_0692_fsspec_reading.py index b24921d80..d2851c0d1 100644 --- a/tests/test_0692_fsspec_reading.py +++ b/tests/test_0692_fsspec_reading.py @@ -244,6 +244,71 @@ def test_fsspec_zip(tmp_path): assert len(data) == 40 +@pytest.mark.network +@pytest.mark.xrootd +@pytest.mark.parametrize( + "handler", + [ + uproot.source.fsspec.FSSpecSource, + uproot.source.xrootd.XRootDSource, + None, + ], +) +def test_open_fsspec_xrootd_iterate_files(handler): + pytest.importorskip("XRootD") + + iterator = uproot.iterate( + files=[ + { + "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root": "Events" + }, + { + "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root": "Events" + }, + ], + expressions=["run"], + step_size=100, + handler=handler, + ) + + for i, data in enumerate(iterator): + if i >= 5: + break + assert len(data) == 100 + assert all(data["run"] == 194778) + + +@pytest.mark.network +@pytest.mark.xrootd +@pytest.mark.parametrize( + "handler", + [ + uproot.source.fsspec.FSSpecSource, + uproot.source.xrootd.XRootDSource, + None, + ], +) +def test_open_fsspec_xrootd_iterate_tree(handler): + pytest.importorskip("XRootD") + + with uproot.open( + { + "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root": "Events" + }, + handler=handler, + ) as f: + iterator = f.iterate( + ["run"], + step_size=100, + ) + + for i, data in enumerate(iterator): + if i >= 5: + break + assert len(data) == 100 + assert all(data["run"] == 194778) + + # https://github.com/scikit-hep/uproot5/issues/1035 @pytest.mark.parametrize( "handler", diff --git a/tests/test_0976_path_object_split.py b/tests/test_0976_path_object_split.py index be6100e01..a5617fa18 100644 --- a/tests/test_0976_path_object_split.py +++ b/tests/test_0976_path_object_split.py @@ -14,13 +14,6 @@ "Events", ), ), - ( - "https://github.com/scikit-hep/scikit-hep-testdata/raw/v0.4.33/src/skhep_testdata/data/uproot-issue121.root", - ( - "https://github.com/scikit-hep/scikit-hep-testdata/raw/v0.4.33/src/skhep_testdata/data/uproot-issue121.root", - None, - ), - ), ( "github://scikit-hep:scikit-hep-testdata@v0.4.33/src/skhep_testdata/data/uproot-issue121.root:Dir/Events", ( @@ -28,13 +21,6 @@ "Dir/Events", ), ), - ( - "github://scikit-hep:scikit-hep-testdata@v0.4.33/src/skhep_testdata/data/uproot-issue121.root", - ( - "github://scikit-hep:scikit-hep-testdata@v0.4.33/src/skhep_testdata/data/uproot-issue121.root", - None, - ), - ), ( " http://localhost:8080/dir/test.root: Test ", ( @@ -56,13 +42,6 @@ "Dir/Test", ), ), - ( - r"C:\tmp\test\dir\file.root", - ( - r"C:\tmp\test\dir\file.root", - None, - ), - ), ( "ssh://user@host:22/path/to/file.root:/object//path", ( @@ -78,10 +57,31 @@ ), ), ( - "ssh://user@host:50230/path/to/file.root", + "s3://bucket/path/to/file.root:/dir////object", ( - "ssh://user@host:50230/path/to/file.root", - None, + "s3://bucket/path/to/file.root", + "dir/object", + ), + ), + ( + "s3://bucket/path/to/file.root:", + ( + "s3://bucket/path/to/file.root", + "", + ), + ), + ( + "ssh://user@host:22/path/to/file.root:/object//path", + ( + "ssh://user@host:22/path/to/file.root", + "object/path", + ), + ), + ( + "ssh://user@host:22/path/to/file.root:/object//path:with:colon:in:path/something/", + ( + "ssh://user@host:22/path/to/file.root", + "object/path:with:colon:in:path/something", ), ), ( @@ -105,11 +105,12 @@ "Events", ), ), + # https://github.com/scikit-hep/uproot5/issues/975 ( - "00376186-543E-E311-8D30-002618943857.root", + "DAOD_PHYSLITE_2023-09-13T1230.art.rntuple.root:RNT:CollectionTree", ( - "00376186-543E-E311-8D30-002618943857.root", - None, + "DAOD_PHYSLITE_2023-09-13T1230.art.rntuple.root", + "RNT:CollectionTree", ), ), # https://github.com/scikit-hep/uproot5/issues/975 @@ -155,13 +156,6 @@ "Events/MET_pt", ), ), - ( - "/some/weird/path:with:colons/file.root", - ( - "/some/weird/path:with:colons/file.root", - None, - ), - ), ( r"C:\tmp\test\dir\my%20file.root:Dir/Test", ( @@ -178,15 +172,40 @@ def test_url_split(input_value, expected_output): assert obj == obj_expected +@pytest.mark.parametrize( + "input_value", + [ + "/some/weird/path:with:colons/file.root", + "00376186-543E-E311-8D30-002618943857.root", + " file.root", + "dir/file with spaces.root", + "ssh://user@host:50230/path/to/file.root", + r"C:\tmp\test\dir\file.root", + "github://scikit-hep:scikit-hep-testdata@v0.4.33/src/skhep_testdata/data/uproot-issue121.root", + "https://github.com/scikit-hep/scikit-hep-testdata/raw/v0.4.33/src/skhep_testdata/data/uproot-issue121.root", + "root://xcache.af.uchicago.edu:1094//root://fax.mwt2.org:1094//pnfs/uchicago.edu/atlaslocalgroupdisk/rucio/data18_13TeV/df/a4/DAOD_PHYSLITE.34858087._000001.pool.root.1", + "root://xcacheserver:2222//root://originserver:1111/path/file", + "https://xcacheserver:1111//root[s]://originserver:12222/path/file", + "roots://xcacheserver:2312//https://originserver:3122/path/file", + "http://xcacheserver:8762//https://originserver:4212/path/file", + ], +) +def test_url_no_split(input_value): + url, obj = uproot._util.file_object_path_split(input_value) + assert obj is None + assert url == input_value.strip() + + @pytest.mark.parametrize( "input_value", [ "local/file.root.zip://Events", "local/file.roo://Events", "local/file://Events", + "http://xcacheserver:8762//https://originserver:4212/path/file.root.1:CollectionTree", ], ) def test_url_split_invalid(input_value): - path, obj = uproot._util.file_object_path_split(input_value) + url, obj = uproot._util.file_object_path_split(input_value) assert obj is None - assert path == input_value + assert url == input_value