Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
mxmlnkn committed Oct 5, 2024
1 parent e2e4d7c commit 3766ec4
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 9 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ jobs:

steps:
- uses: actions/checkout@v4
with:
# We need one tag for testing the git mount.
# This is BROKEN! God damn it. Is anything working at all...
# https://github.com/actions/checkout/issues/1781
fetch-tags: true

- name: Fetch tag for tests
run: git fetch origin refs/tags/v0.15.2:refs/tags/v0.15.2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
Expand Down Expand Up @@ -167,6 +175,7 @@ jobs:
python3 -m pip install --upgrade-strategy eager --upgrade twine build fusepy cffi
- name: Test Startup With Only One Compression Dependency
if: false
run: |
for module in indexed_gzip indexed_zstd lzmaffi python-xz; do
python3 -m pip install --upgrade "$module"
Expand Down Expand Up @@ -216,7 +225,12 @@ jobs:
# Segfaults (139) are not allowed but other exit codes are valid!
python3 ratarmount.py tests/simple.bz2 || [ $? != 139 ]
- name: Install pip Test Dependencies
run: |
python3 -m pip install -r tests/requirements-tests.txt
- name: Unit Tests
if: false
run: |
python3 -m pip install pytest pytest-xdist
for file in core/tests/test_*.py tests/test_*.py; do
Expand Down Expand Up @@ -246,6 +260,15 @@ jobs:
done
python3 tests/tests.py
- name: Git Debug
if: ${{ !startsWith( matrix.os, 'macos' ) }}
run: |
git status
pwd
ls -la
ls -la .git
git show --stat v0.15.2
- name: Regression Tests (FUSE 3)
if: ${{ !startsWith( matrix.os, 'macos' ) }}
run: |
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ And in contrast to [tarindexer](https://github.com/devsnd/tarindexer), which als
All changes below the mountpoint will be redirected to this folder and deletions are tracked so that all changes can be applied back to the archive.
- **Remote Files and Folders:** A remote archive or whole folder structure can be mounted similar to tools like [sshfs](https://github.com/libfuse/sshfs) thanks to the [filesystem_spec](https://github.com/fsspec/filesystem_spec) project.
These can be specified with URIs as explained in the section ["Remote Files"](#remote-files).
Supported remote protocols include: FTP, SFTP, HTTP, HTTPS, SSH, Git, Github, S3, Samba, Azure Datalake, Dropbox, Google Cloud Storage, ...
Supported remote protocols include: FTP, SFTP, HTTP, HTTPS, SSH, Git, Github, S3, Samba, Azure Datalake, Dropbox, Google Cloud Storage (GCS), ... Many of these are very experimental and may be slow. Azure and GCS are not even tested.


*TAR compressions supported for random access:*
Expand Down
39 changes: 38 additions & 1 deletion core/ratarmountcore/FSSpecMountSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import os
import stat
import sys
import time
import urllib
from typing import Dict, IO, Iterable, Optional, Union
Expand All @@ -19,6 +20,38 @@


class FSSpecMountSource(MountSource):
"""
Generic wrapper around fsspec-based filesystems.
At least as "generic" as it gets given that many details are inconsistent between the implementations.
Note also that many implementations are rather experimental, utterly slow, or unstable.
"""
# TODO implement some of the most rudimentarily implemented filesystems myself instead of via fsspec.
# wc -l 'fsspec/implementations/'*.py | sort -n
# 0 fsspec/implementations/__init__.py
# 58 fsspec/implementations/data.py
# 75 fsspec/implementations/cache_mapper.py
# 124 fsspec/implementations/jupyter.py
# 124 fsspec/implementations/tar.py -> SQLiteIndexedTar
# 127 fsspec/implementations/git.py -> TODO
# 152 fsspec/implementations/dask.py
# 176 fsspec/implementations/zip.py -> ZipMountSource
# 180 fsspec/implementations/sftp.py -> fsspec/sshfs
# 213 fsspec/implementations/libarchive.py -> LibarchiveMountSource
# 232 fsspec/implementations/cache_metadata.py
# 239 fsspec/implementations/github.py
# 303 fsspec/implementations/memory.py
# 304 fsspec/implementations/arrow.py
# 372 fsspec/implementations/dirfs.py -> FolderMountSource + chdir
# 395 fsspec/implementations/ftp.py
# 416 fsspec/implementations/smb.py
# 467 fsspec/implementations/dbfs.py
# 471 fsspec/implementations/local.py -> FolderMountSource
# 484 fsspec/implementations/webhdfs.py
# 872 fsspec/implementations/http.py
# 929 fsspec/implementations/cached.py
# 1173 fsspec/implementations/reference.py
# I guess git is the most obvious candidate because it is the most interesting and most barebone implementation.

def __init__(self, urlOrOpenFile, **options) -> None:
"""
urlOrOpenFile : Take a URL or an already opened fsspec Filesystem object.
Expand Down Expand Up @@ -179,7 +212,6 @@ def _getFileInfoHTTP(self, path: str) -> Optional[FileInfo]:

@overrides(MountSource)
def getFileInfo(self, path: str, fileVersion: int = 0) -> Optional[FileInfo]:
print("[getFileInfo]", path, "->", self._getPath(path), "exists:", self.exists(path))
if self._isHTTP:
return self._getFileInfoHTTP(path)

Expand All @@ -204,6 +236,11 @@ def getFileInfo(self, path: str, fileVersion: int = 0) -> Optional[FileInfo]:
# asyncssh.sftp.SFTPNoSuchFile: No such file
return self.rootFileInfo.clone()

# ftp://$user:[email protected]:8021/ -> times out!?
# ftp://$user:[email protected]:8021/tests -> works fine!?
# -> Cannot reproduce this anymore. May have been pyftpdlib problem.
if not self.fileSystem.lexists(path):
return None
return FSSpecMountSource._convertToFileInfo(self.fileSystem.info(path), path)

@overrides(MountSource)
Expand Down
8 changes: 7 additions & 1 deletion core/ratarmountcore/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Disable pylint errors. See https://github.com/fsspec/filesystem_spec/issues/1678

import os
import sys
import traceback

from typing import IO, Optional, Union
Expand Down Expand Up @@ -131,7 +132,12 @@ def openMountSource(fileOrPath: Union[str, IO[bytes]], **options) -> MountSource
if printDebug >= 3:
print(f"[Info] Try to open with {name}")

openFile = fsspec.open(fileOrPath)
# Suppress warning about (default!) encoding not being support for Python<3.9 -.-.
fsspecOptions = {}
if sys.version_info < (3, 9) and protocol == 'ftp':
fsspecOptions['encoding'] = None

openFile = fsspec.open(fileOrPath, **fsspecOptions)
assert isinstance(openFile, fsspec.core.OpenFile)

if printDebug >= 3:
Expand Down
12 changes: 6 additions & 6 deletions tests/runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1844,7 +1844,7 @@ checkURLProtocolFTP()

checkURLProtocolSSH()
{
local pid fingerprint
local pid fingerprint publicKey
# rm -f ssh_host_key; ssh-keygen -q -N "" -C "" -t ed25519 -f ssh_host_key
cat <<EOF > ssh_host_key
-----BEGIN OPENSSH PRIVATE KEY-----
Expand All @@ -1860,11 +1860,13 @@ EOF
fingerprint=$( ssh-keyscan -H -p 8022 127.0.0.1 2>/dev/null )
'grep' -q -F "$fingerprint" ~/.ssh/known_hosts || echo "$fingerprint" >> ~/.ssh/known_hosts

[[ -f ~/.ssh/id_ed25519 ]] || ssh-keygen -q -N "" -t ed25519 -f ~/.ssh/id_ed25519
publicKey=$( cat ~/.ssh/id_ed25519.pub )
'grep' -q -F "$publicKey" ssh_user_ca || echo "$publicKey" >> ssh_user_ca

python3 tests/start-asyncssh-server.py &
pid=$!
sleep 1
[[ -f ~/.ssh/id_ed25519 ]] || ssh-keygen -q -N "" -t ed25519 -f ~/.ssh/id_ed25519
cat ~/.ssh/id_ed25519.pub >> ssh_user_ca

checkFileInTAR 'ssh://127.0.0.1:8022/tests/single-file.tar' bar d3b07384d113edec49eaa6238ad5ff00 ||
returnError "$LINENO" 'Failed to read from HTTP server'
Expand Down Expand Up @@ -2039,9 +2041,7 @@ checkRemoteSupport()
# https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
checkURLProtocolFile
checkURLProtocolSamba
#checkURLProtocolGit # TODO does not work in CI for some reason...
# Warning: Trying to open with fsspec raised an exception: 'v0.15.2'
# Error: Mount source does not exist: git://v0.15.2@tests/single-file.tar
checkURLProtocolGit # TODO does not work in CI for some reason...

#checkURLProtocolHTTP # TODO
checkURLProtocolFTP
Expand Down

0 comments on commit 3766ec4

Please sign in to comment.