Skip to content

Commit

Permalink
[pre-commit.ci] pre-commit autoupdate (#3290)
Browse files Browse the repository at this point in the history
<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.6 →
v0.5.7](astral-sh/ruff-pre-commit@v0.5.6...v0.5.7)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
pre-commit-ci[bot] authored Aug 20, 2024
1 parent 38edd5b commit d3ae5da
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 94 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ repos:
- id: check-toml
- id: debug-statements
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.6
rev: v0.6.1
hooks:
- id: ruff-format
- id: ruff
Expand Down
81 changes: 42 additions & 39 deletions doc/kmers-and-minhash.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@
"def jaccard_similarity(a, b):\n",
" a = set(a)\n",
" b = set(b)\n",
" \n",
"\n",
" intersection = len(a.intersection(b))\n",
" union = len(a.union(b))\n",
" \n",
"\n",
" return intersection / union"
]
},
Expand All @@ -65,9 +65,9 @@
"def jaccard_containment(a, b):\n",
" a = set(a)\n",
" b = set(b)\n",
" \n",
"\n",
" intersection = len(a.intersection(b))\n",
" \n",
"\n",
" return intersection / len(a)"
]
},
Expand All @@ -84,9 +84,9 @@
"metadata": {},
"outputs": [],
"source": [
"a = ['ATGG', 'AACC']\n",
"b = ['ATGG', 'CACA']\n",
"c = ['ATGC', 'CACA']"
"a = [\"ATGG\", \"AACC\"]\n",
"b = [\"ATGG\", \"CACA\"]\n",
"c = [\"ATGC\", \"CACA\"]"
]
},
{
Expand Down Expand Up @@ -270,11 +270,11 @@
"def build_kmers(sequence, ksize):\n",
" kmers = []\n",
" n_kmers = len(sequence) - ksize + 1\n",
" \n",
"\n",
" for i in range(n_kmers):\n",
" kmer = sequence[i:i + ksize]\n",
" kmer = sequence[i : i + ksize]\n",
" kmers.append(kmer)\n",
" \n",
"\n",
" return kmers"
]
},
Expand Down Expand Up @@ -307,7 +307,7 @@
}
],
"source": [
"build_kmers('ATGGACCAGATATAGGGAGAGCCAGGTAGGACA', 21)"
"build_kmers(\"ATGGACCAGATATAGGGAGAGCCAGGTAGGACA\", 21)"
]
},
{
Expand All @@ -325,8 +325,8 @@
"metadata": {},
"outputs": [],
"source": [
"seq1 = 'ATGGACCAGATATAGGGAGAGCCAGGTAGGACA'\n",
"seq2 = 'ATGGACCAGATATTGGGAGAGCCGGGTAGGACA'\n",
"seq1 = \"ATGGACCAGATATAGGGAGAGCCAGGTAGGACA\"\n",
"seq2 = \"ATGGACCAGATATTGGGAGAGCCGGGTAGGACA\"\n",
"# differences: ^ ^"
]
},
Expand Down Expand Up @@ -375,13 +375,14 @@
"metadata": {},
"outputs": [],
"source": [
"import screed # a library for reading in FASTA/FASTQ\n",
"import screed # a library for reading in FASTA/FASTQ\n",
"\n",
"\n",
"def read_kmers_from_file(filename, ksize):\n",
" all_kmers = []\n",
" for record in screed.open(filename):\n",
" sequence = record.sequence\n",
" \n",
"\n",
" kmers = build_kmers(sequence, ksize)\n",
" all_kmers += kmers\n",
"\n",
Expand All @@ -394,7 +395,7 @@
"metadata": {},
"outputs": [],
"source": [
"akker_kmers = read_kmers_from_file('genomes/akkermansia.fa', 31)"
"akker_kmers = read_kmers_from_file(\"genomes/akkermansia.fa\", 31)"
]
},
{
Expand Down Expand Up @@ -444,8 +445,8 @@
"metadata": {},
"outputs": [],
"source": [
"shew1_kmers = read_kmers_from_file('genomes/shew_os185.fa', 31)\n",
"shew2_kmers = read_kmers_from_file('genomes/shew_os223.fa', 31)"
"shew1_kmers = read_kmers_from_file(\"genomes/shew_os185.fa\", 31)\n",
"shew2_kmers = read_kmers_from_file(\"genomes/shew_os223.fa\", 31)"
]
},
{
Expand All @@ -471,9 +472,9 @@
}
],
"source": [
"print('akker vs shew1', jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print('akker vs shew2', jaccard_similarity(akker_kmers, shew2_kmers))\n",
"print('shew1 vs shew2', jaccard_similarity(shew1_kmers, shew2_kmers))"
"print(\"akker vs shew1\", jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print(\"akker vs shew2\", jaccard_similarity(akker_kmers, shew2_kmers))\n",
"print(\"shew1 vs shew2\", jaccard_similarity(shew1_kmers, shew2_kmers))"
]
},
{
Expand All @@ -492,9 +493,9 @@
}
],
"source": [
"print('akker vs shew1', jaccard_containment(akker_kmers, shew1_kmers))\n",
"print('akker vs shew2', jaccard_containment(akker_kmers, shew2_kmers))\n",
"print('shew1 vs shew2', jaccard_containment(shew1_kmers, shew2_kmers))"
"print(\"akker vs shew1\", jaccard_containment(akker_kmers, shew1_kmers))\n",
"print(\"akker vs shew2\", jaccard_containment(akker_kmers, shew2_kmers))\n",
"print(\"shew1 vs shew2\", jaccard_containment(shew1_kmers, shew2_kmers))"
]
},
{
Expand Down Expand Up @@ -568,20 +569,22 @@
"source": [
"import mmh3\n",
"\n",
"\n",
"def hash_kmer(kmer):\n",
" # calculate the reverse complement\n",
" rc_kmer = screed.rc(kmer)\n",
" \n",
"\n",
" # determine whether original k-mer or reverse complement is lesser\n",
" if kmer < rc_kmer:\n",
" canonical_kmer = kmer\n",
" else:\n",
" canonical_kmer = rc_kmer\n",
" \n",
"\n",
" # calculate murmurhash using a hash seed of 42\n",
" hash = mmh3.hash64(canonical_kmer, 42)[0]\n",
" if hash < 0: hash += 2**64\n",
" \n",
" if hash < 0:\n",
" hash += 2**64\n",
"\n",
" # done\n",
" return hash"
]
Expand Down Expand Up @@ -610,7 +613,7 @@
}
],
"source": [
"hash_kmer('ATGGC')"
"hash_kmer(\"ATGGC\")"
]
},
{
Expand All @@ -637,7 +640,7 @@
}
],
"source": [
"hash_kmer('ATGGC')"
"hash_kmer(\"ATGGC\")"
]
},
{
Expand All @@ -664,7 +667,7 @@
}
],
"source": [
"hash_kmer('GCCAT')"
"hash_kmer(\"GCCAT\")"
]
},
{
Expand All @@ -691,7 +694,7 @@
}
],
"source": [
"hash_kmer('GCCAA')"
"hash_kmer(\"GCCAA\")"
]
},
{
Expand Down Expand Up @@ -836,7 +839,7 @@
" if hash_kmer(kmer) < keep_below:\n",
" keep.append(kmer)\n",
" # otherwise, discard\n",
" \n",
"\n",
" return keep"
]
},
Expand Down Expand Up @@ -901,8 +904,8 @@
}
],
"source": [
"print('akker vs akker, total', jaccard_similarity(akker_kmers, akker_kmers))\n",
"print('akker vs akker, sub', jaccard_similarity(akker_sub, akker_sub))"
"print(\"akker vs akker, total\", jaccard_similarity(akker_kmers, akker_kmers))\n",
"print(\"akker vs akker, sub\", jaccard_similarity(akker_sub, akker_sub))"
]
},
{
Expand All @@ -920,8 +923,8 @@
}
],
"source": [
"print('akker vs shew1, total', jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print('akker vs shew1, sub', jaccard_similarity(akker_sub, shew1_sub))"
"print(\"akker vs shew1, total\", jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print(\"akker vs shew1, sub\", jaccard_similarity(akker_sub, shew1_sub))"
]
},
{
Expand All @@ -939,8 +942,8 @@
}
],
"source": [
"print('shew1 vs shew2, total', jaccard_similarity(shew1_kmers, shew2_kmers))\n",
"print('shew1 vs shew2, sub', jaccard_similarity(shew1_sub, shew2_sub))"
"print(\"shew1 vs shew2, total\", jaccard_similarity(shew1_kmers, shew2_kmers))\n",
"print(\"shew1 vs shew2, sub\", jaccard_similarity(shew1_sub, shew2_sub))"
]
},
{
Expand Down
46 changes: 29 additions & 17 deletions doc/plotting-compare.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
"metadata": {},
"outputs": [],
"source": [
"matrix, labels = fig.load_matrix_and_labels('compare-demo')"
"matrix, labels = fig.load_matrix_and_labels(\"compare-demo\")"
]
},
{
Expand Down Expand Up @@ -139,8 +139,8 @@
}
],
"source": [
"print('matrix:\\n', matrix)\n",
"print('labels:', labels)"
"print(\"matrix:\\n\", matrix)\n",
"print(\"labels:\", labels)"
]
},
{
Expand Down Expand Up @@ -192,8 +192,8 @@
}
],
"source": [
"print('reordered matrix:\\n', reordered_matrix)\n",
"print('reordered labels:', reordered_labels)"
"print(\"reordered matrix:\\n\", reordered_matrix)\n",
"print(\"reordered labels:\", reordered_labels)"
]
},
{
Expand All @@ -218,8 +218,10 @@
"source": [
"import scipy.cluster.hierarchy as sch\n",
"\n",
"def plot_composite_matrix(D, labeltext, show_labels=True,\n",
" vmax=1.0, vmin=0.0, force=False):\n",
"\n",
"def plot_composite_matrix(\n",
" D, labeltext, show_labels=True, vmax=1.0, vmin=0.0, force=False\n",
"):\n",
" \"\"\"Build a composite plot showing dendrogram + distance matrix/heatmap.\n",
"\n",
" Returns a matplotlib figure.\n",
Expand All @@ -228,25 +230,34 @@
" shown on the plot.\n",
" \"\"\"\n",
" if D.max() > 1.0 or D.min() < 0.0:\n",
" error('This matrix doesn\\'t look like a distance matrix - min value {}, max value {}', D.min(), D.max())\n",
" error(\n",
" \"This matrix doesn't look like a distance matrix - min value {}, max value {}\",\n",
" D.min(),\n",
" D.max(),\n",
" )\n",
" if not force:\n",
" raise ValueError(\"not a distance matrix\")\n",
" else:\n",
" notify('force is set; scaling to [0, 1]')\n",
" notify(\"force is set; scaling to [0, 1]\")\n",
" D -= D.min()\n",
" D /= D.max()\n",
"\n",
" if show_labels:\n",
" show_indices = True\n",
" pass\n",
"\n",
" fig = pylab.figure(figsize=(11, 8))\n",
" ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])\n",
"\n",
" # plot dendrogram\n",
" Y = sch.linkage(D, method='single') # centroid\n",
" Y = sch.linkage(D, method=\"single\") # centroid\n",
"\n",
" Z1 = sch.dendrogram(Y, orientation='left', labels=labeltext,\n",
" no_labels=not show_labels, get_leaves=True)\n",
" Z1 = sch.dendrogram(\n",
" Y,\n",
" orientation=\"left\",\n",
" labels=labeltext,\n",
" no_labels=not show_labels,\n",
" get_leaves=True,\n",
" )\n",
" ax1.set_xticks([])\n",
"\n",
" xstart = 0.45\n",
Expand All @@ -256,8 +267,8 @@
" scale_xstart = xstart + width + 0.01\n",
"\n",
" # re-order labels along rows, top to bottom\n",
" idx1 = Z1['leaves']\n",
" reordered_labels = [ labeltext[i] for i in idx1 ]\n",
" idx1 = Z1[\"leaves\"]\n",
" reordered_labels = [labeltext[i] for i in idx1]\n",
"\n",
" # reorder D by the clustering in the dendrogram\n",
" D = D[idx1, :]\n",
Expand All @@ -266,8 +277,9 @@
" # show matrix\n",
" axmatrix = fig.add_axes([xstart, 0.1, width, 0.6])\n",
"\n",
" im = axmatrix.matshow(D, aspect='auto', origin='lower',\n",
" cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax)\n",
" im = axmatrix.matshow(\n",
" D, aspect=\"auto\", origin=\"lower\", cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax\n",
" )\n",
" axmatrix.set_xticks([])\n",
" axmatrix.set_yticks([])\n",
"\n",
Expand Down
6 changes: 4 additions & 2 deletions doc/sourmash-collections.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,8 @@
],
"source": [
"from IPython.display import Image\n",
"Image(filename='compare_all.mat.matrix.png') "
"\n",
"Image(filename=\"compare_all.mat.matrix.png\")"
]
},
{
Expand Down Expand Up @@ -857,7 +858,8 @@
],
"source": [
"import pandas\n",
"df = pandas.read_csv('podar-lineage.csv')\n",
"\n",
"df = pandas.read_csv(\"podar-lineage.csv\")\n",
"df"
]
},
Expand Down
Loading

0 comments on commit d3ae5da

Please sign in to comment.