Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EXP: skipmer sketching #531

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
68 changes: 33 additions & 35 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ crate-type = ["cdylib"]
pyo3 = { version = "0.23.2", features = ["extension-module", "anyhow"] }
rayon = "1.10.0"
serde = { version = "1.0.216", features = ["derive"] }
sourmash = { version = "0.17.2", features = ["branchwater"] }
#sourmash = { version = "0.17.2", features = ["branchwater"] }
sourmash = {git = "https://github.com/sourmash-bio/sourmash/", branch = "try-skipmers", features = ["branchwater"]}
serde_json = "1.0.133"
niffler = "2.4.0"
log = "0.4.22"
env_logger = { version = "0.11.5" }
simple-error = "0.3.1"
anyhow = "1.0.94"
zip = { version = "2.0", default-features = false }
zip = { version = "=2.0", default-features = false }
tempfile = "3.14"
needletail = "0.5.1"
csv = "1.3.1"
Expand Down
26 changes: 13 additions & 13 deletions src/python/sourmash_plugin_branchwater/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def __init__(self, p):
"-m",
"--moltype",
default="DNA",
choices=["DNA", "protein", "dayhoff", "hp"],
help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
choices=["DNA", "protein", "dayhoff", "hp", "skipm1n3", "skipm2n3"],
help="molecule type: DNA, protein, dayhoff, hp, or skipmer (skipm1n3 or skipm2n3); default DNA",
)
p.add_argument(
"-c",
Expand Down Expand Up @@ -180,8 +180,8 @@ def __init__(self, p):
"-m",
"--moltype",
default="DNA",
choices=["DNA", "protein", "dayhoff", "hp"],
help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
choices=["DNA", "protein", "dayhoff", "hp", "skipm1n3", "skipm2n3"],
help="molecule type: DNA, protein, dayhoff, hp, or skipmer (skipm1n3 or skipm2n3); default DNA",
)
p.add_argument(
"-c",
Expand Down Expand Up @@ -257,8 +257,8 @@ def __init__(self, p):
"-m",
"--moltype",
default="DNA",
choices=["DNA", "protein", "dayhoff", "hp"],
help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
choices=["DNA", "protein", "dayhoff", "hp", "skipm1n3", "skipm2n3"],
help="molecule type: DNA, protein, dayhoff, hp, or skipmer (skipm1n3 or skipm2n3); default DNA",
)
p.add_argument(
"-c",
Expand Down Expand Up @@ -341,8 +341,8 @@ def __init__(self, p):
"-m",
"--moltype",
default="DNA",
choices=["DNA", "protein", "dayhoff", "hp"],
help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
choices=["DNA", "protein", "dayhoff", "hp", "skipm1n3", "skipm2n3"],
help="molecule type: DNA, protein, dayhoff, hp, or skipmer (skipm1n3 or skipm2n3); default DNA",
)
p.add_argument(
"-c",
Expand Down Expand Up @@ -443,8 +443,8 @@ def __init__(self, p):
"-m",
"--moltype",
default="DNA",
choices=["DNA", "protein", "dayhoff", "hp"],
help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
choices=["DNA", "protein", "dayhoff", "hp", "skipm1n3", "skipm2n3"],
help="molecule type: DNA, protein, dayhoff, hp, or skipmer (skipm1n3 or skipm2n3); default DNA",
)
p.add_argument(
"-c",
Expand Down Expand Up @@ -530,8 +530,8 @@ def __init__(self, p):
"-m",
"--moltype",
default="DNA",
choices=["DNA", "protein", "dayhoff", "hp"],
help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
choices=["DNA", "protein", "dayhoff", "hp", "skipm1n3", "skipm2n3"],
help="molecule type: DNA, protein, dayhoff, hp, or skipmer (skipm1n3 or skipm2n3); default DNA",
)
p.add_argument(
"-c",
Expand Down Expand Up @@ -618,7 +618,7 @@ def main(self, args):
args.param_string = ["k=31,scaled=1000,dna"]

# Check and append 'dna' if no moltype is found in a param string
moltypes = ["dna", "protein", "dayhoff", "hp"]
moltypes = ["dna", "protein", "dayhoff", "hp", "skipm1n3", "skipm2n3"]
updated_param_strings = []

for param in args.param_string:
Expand Down
45 changes: 8 additions & 37 deletions src/python/tests/sourmash_tst_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import collections
import pprint

import pkg_resources
from pkg_resources import Requirement, resource_filename, ResolutionError
import importlib.metadata
import traceback
from io import open # pylint: disable=redefined-builtin
from io import StringIO
Expand Down Expand Up @@ -61,46 +60,18 @@ def index_siglist(
return db


def scriptpath(scriptname="sourmash"):
"""Return the path to the scripts, in both dev and install situations."""
# note - it doesn't matter what the scriptname is here, as long as
# it's some script present in this version of sourmash.

path = os.path.join(os.path.dirname(__file__), "../")
if os.path.exists(os.path.join(path, scriptname)):
return path

path = os.path.join(os.path.dirname(__file__), "../../EGG-INFO/")
if os.path.exists(os.path.join(path, scriptname)):
return path

for path in os.environ["PATH"].split(":"):
if os.path.exists(os.path.join(path, scriptname)):
return path


def _runscript(scriptname):
"""Find & run a script with exec (i.e. not via os.system or subprocess)."""
namespace = {"__name__": "__main__"}
namespace["sys"] = globals()["sys"]

try:
pkg_resources.load_entry_point("sourmash", "console_scripts", scriptname)()
return 0
except pkg_resources.ResolutionError:
pass

path = scriptpath()

scriptfile = os.path.join(path, scriptname)
if os.path.isfile(scriptfile):
if os.path.isfile(scriptfile):
exec( # pylint: disable=exec-used
compile(open(scriptfile).read(), scriptfile, "exec"), namespace
)
return 0

return -1
entry_points = importlib.metadata.entry_points(
group="console_scripts", name="sourmash"
)
assert len(entry_points) == 1
smash_cli = tuple(entry_points)[0].load()
smash_cli()
return 0


ScriptResults = collections.namedtuple("ScriptResults", ["status", "out", "err"])
Expand Down
Binary file added src/python/tests/test-data/SRR606249.skipm2n3.zip
Binary file not shown.
Binary file added src/python/tests/test-data/skipm2n3.zip
Binary file not shown.
Loading
Loading