Add codespell integration and fix found typos: added configuration an…

…d github workflow (#1109) Co-authored-by: Philipp A <[email protected]>
scverse · Aug 29, 2023 · 87d363e · 87d363e
1 parent 6c3c6d1
commit 87d363e
Show file tree

Hide file tree

Showing 24 changed files with 63 additions and 30 deletions.
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,22 @@
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -26,3 +26,10 @@ repos:
       - id: detect-private-key
       - id: no-commit-to-branch
         args: ["--branch=main"]
+
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.5
+    hooks:
+      - id: codespell
+        additional_dependencies:
+          - tomli
diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py
@@ -305,7 +305,7 @@ class LayersBase(AlignedMapping):
     attrname = "layers"
     axes = (0, 1)
 
-    # TODO: I thought I had a more elegant solution to overiding this...
+    # TODO: I thought I had a more elegant solution to overriding this...
     def copy(self) -> "Layers":
         d = self._actual_class(self.parent)
         for k, v in self.items():

diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py
@@ -650,7 +650,7 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]):
 
         # If indices are both arrays, we need to modify them
         # so we don’t set values like coordinates
-        # This can occur if there are succesive views
+        # This can occur if there are successive views
         if (
             self.is_view
             and isinstance(self._oidx, np.ndarray)
@@ -667,7 +667,7 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]):
         ):
             if not np.isscalar(value) and self.shape != value.shape:
                 # For assigning vector of values to 2d array or matrix
-                # Not neccesary for row of 2d array
+                # Not necessary for row of 2d array
                 value = value.reshape(self.shape)
             if self.isbacked:
                 if self.is_view:
@@ -1377,7 +1377,7 @@ def obs_vector(self, k: str, *, layer: Optional[str] = None) -> np.ndarray:
 
         Returns
         -------
-        A one dimensional nd array, with values for each obs in the same order
+        A one dimensional ndarray, with values for each obs in the same order
         as :attr:`obs_names`.
         """
         if layer == "X":
@@ -1409,7 +1409,7 @@ def var_vector(self, k, *, layer: Optional[str] = None) -> np.ndarray:
 
         Returns
         -------
-        A one dimensional nd array, with values for each var in the same order
+        A one dimensional ndarray, with values for each var in the same order
         as :attr:`var_names`.
         """
         if layer == "X":

diff --git a/anndata/_core/merge.py b/anndata/_core/merge.py
@@ -94,7 +94,7 @@ def not_missing(v) -> bool:
 
 
 # We need to be able to check for equality of arrays to know which are the same.
-# Unfortunatley equality of arrays is poorly defined.
+# Unfortunately equality of arrays is poorly defined.
 # * `np.array_equal` does not work for sparse arrays
 # * `np.array_equal(..., equal_nan=True)` does not work for null values at the moment
 #   (see https://github.com/numpy/numpy/issues/16377)
@@ -1042,7 +1042,7 @@ def concat(
         incrementing integer labels.
     index_unique
         Whether to make the index unique by using the keys. If provided, this
-        is the delimeter between "{orig_idx}{index_unique}{key}". When `None`,
+        is the delimiter between "{orig_idx}{index_unique}{key}". When `None`,
         the original indices are kept.
     fill_value
         When `join="outer"`, this is the value that will be used to fill the introduced

diff --git a/anndata/_io/read.py b/anndata/_io/read.py
@@ -441,7 +441,7 @@ def _read_text(
         else:
             data.append(np.array(line_list, dtype=dtype))
     # logg.msg("    read data into list of lists", t=True, v=4)
-    # transfrom to array, this takes a long time and a lot of memory
+    # transform to array, this takes a long time and a lot of memory
     # but it’s actually the same thing as np.genfromtxt does
     # - we don’t use the latter as it would involve another slicing step
     #   in the end, to separate row_names from float data, slicing takes

diff --git a/anndata/_io/specs/methods.py b/anndata/_io/specs/methods.py
@@ -350,7 +350,7 @@ def write_basic_dask_zarr(f, k, elem, _writer, dataset_kwargs=MappingProxyType({
     da.store(elem, g, lock=GLOBAL_LOCK)
 
 
-# Adding this seperately because h5py isn't serializable
+# Adding this separately because h5py isn't serializable
 # https://github.com/pydata/xarray/issues/4242
 @_REGISTRY.register_write(H5Group, DaskArray, IOSpec("array", "0.2.0"))
 def write_basic_dask_h5(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):

diff --git a/anndata/_io/specs/registry.py b/anndata/_io/specs/registry.py
@@ -276,7 +276,7 @@ def write_elem(
         if elem is None:
             return lambda *_, **__: None
 
-        # Normalize k to abosulte path
+        # Normalize k to absolute path
         if not PurePosixPath(k).is_absolute():
             k = str(PurePosixPath(store.name) / k)
 

diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py
@@ -145,7 +145,7 @@ def _from_fixed_length_strings(value):
     """\
     Convert from fixed length strings to unicode.
 
-    For backwards compatability with older h5ad and zarr files.
+    For backwards compatibility with older h5ad and zarr files.
     """
     new_dtype = []
     for dt in value.dtype.descr:

diff --git a/anndata/experimental/_dispatch_io.py b/anndata/experimental/_dispatch_io.py
@@ -58,7 +58,7 @@ def write_dispatched(
     dataset_kwargs=MappingProxyType({}),
 ) -> None:
     """
-    Write elem to store, recusively calling callback at each sub-element.
+    Write elem to store, recursively calling callback at each sub-element.
 
     Params
     ------

diff --git a/anndata/experimental/merge.py b/anndata/experimental/merge.py
@@ -478,7 +478,7 @@ def concat_on_disk(
         incrementing integer labels.
     index_unique
         Whether to make the index unique by using the keys. If provided, this
-        is the delimeter between "{orig_idx}{index_unique}{key}". When `None`,
+        is the delimiter between "{orig_idx}{index_unique}{key}". When `None`,
         the original indices are kept.
     fill_value
         When `join="outer"`, this is the value that will be used to fill the introduced

diff --git a/anndata/experimental/multi_files/_anncollection.py b/anndata/experimental/multi_files/_anncollection.py
@@ -392,7 +392,7 @@ def X(self):
         The data matrix formed from the `.X` attributes of the underlying `adatas`,
         properly reindexed and lazily merged.
         Nothing is copied until `.X` is accessed, no real concatenation of the
-        unerlying `.X` attributes is done.
+        underlying `.X` attributes is done.
         """
         # inconsistent behavior here, _X can be changed,
         # but the other attributes can't be changed.
@@ -421,7 +421,7 @@ def layers(self):
     def obsm(self):
         """Lazy subset of multi-dimensional annotation of observations.
 
-        Points to the `.obsm` attributes of the underlying adatas ot to `.obsm` of the parent
+        Points to the `.obsm` attributes of the underlying adatas to `.obsm` of the parent
         AnnCollection object depending on the `join_obsm` option of the AnnCollection object.
         See the docs of :class:`~anndata.experimental.AnnCollection` for details.
         Copy rules are the same as for `.layers`, i.e. everything is lazy.
@@ -437,15 +437,15 @@ def obsm(self):
     def obs(self):
         """Lazy suset of one-dimensional annotation of observations.
 
-        Points to the `.obs` attributes of the underlying adatas ot to `.obs` of the parent
+        Points to the `.obs` attributes of the underlying adatas to `.obs` of the parent
         AnnCollection object depending on the `join_obs` option of the AnnCollection object.
         See the docs of `~anndata.experimental.AnnCollection` for details.
         Copy rules are the same as for `.layers`, i.e. everything is lazy.
 
         To get `.obs` as a DataFrame, use `.obs.df`.
         To get `.obs` as a dictionary, use `.obs.to_dict()`. You can also specify keys
         to include in the dict `.obs.to_dict(keys=['key1', 'key2'])` and if you want
-        converters to be truned off when copying to dict `.obs.to_dict(use_convert=False)`.
+        converters to be turned off when copying to dict `.obs.to_dict(use_convert=False)`.
         """
         self._lazy_init_attr("obs")
         return self._obs_view
@@ -621,7 +621,7 @@ class AnnCollection(_ConcatViewMixin, _IterateViewMixin):
         incrementing integer labels.
     index_unique
         Whether to make the index unique by using the keys. If provided, this
-        is the delimeter between "{orig_idx}{index_unique}{key}". When `None`,
+        is the delimiter between "{orig_idx}{index_unique}{key}". When `None`,
         the original indices are kept.
     convert
         You can pass a function or a Mapping of functions which will be applied

diff --git a/anndata/tests/helpers.py b/anndata/tests/helpers.py
@@ -561,7 +561,7 @@ def fmt_name(x):
     assert_equal(a.obs_names, b.obs_names, exact, elem_name=fmt_name("obs_names"))
     assert_equal(a.var_names, b.var_names, exact, elem_name=fmt_name("var_names"))
     if not exact:
-        # Reorder all elements if neccesary
+        # Reorder all elements if necessary
         idx = [slice(None), slice(None)]
         # Since it’s a pain to compare a list of pandas objects
         change_flag = False

diff --git a/anndata/tests/test_concatenate.py b/anndata/tests/test_concatenate.py
@@ -965,7 +965,7 @@ def permute_nested_values(dicts: "List[dict]", gen_val: "Callable[[int], Any]"):
     This function permutes the values of a nested mapping, for testing that out merge
     method work regardless of the values types.
 
-    Assumes the intial dictionary had integers for values.
+    Assumes the initial dictionary had integers for values.
     """
     dicts = deepcopy(dicts)
     initial_values = [

diff --git a/anndata/tests/test_helpers.py b/anndata/tests/test_helpers.py
@@ -79,7 +79,7 @@ def test_gen_awkward(shape, datashape):
 # Does this work for every warning?
 def test_report_name():
     def raise_error():
-        raise Exception("an error occured!")
+        raise Exception("an error occurred!")
 
     letters = np.array(list(ascii_letters))
     tag = "".join(np.random.permutation(letters))

diff --git a/anndata/tests/test_uns.py b/anndata/tests/test_uns.py
@@ -31,7 +31,7 @@ def test_uns_color_subset():
     assert "cat1_colors" not in v.uns
     assert "cat2_colors" not in v.uns
 
-    # Otherwise the colors should still match after reseting
+    # Otherwise the colors should still match after resetting
     cat1_colors = np.array(["red", "green", "blue", "yellow"], dtype=object)
     adata = AnnData(np.ones((5, 5)), obs=obs, uns={"cat1_colors": cat1_colors.copy()})
 

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -10,7 +10,7 @@ I definitley recommend reading through the asv docs. Currently, this assumes the
 
 Data will need to be retrieved for these benchmarks. This can be downloaded using the script fetch_datasets.py.
 
-Note that the `h5ad` format has changed since it's inception. While the `anndata` package maintains backwards compatability, older versions of `anndata` will not be able to read files written by more recent versions. To get around this for the benchmarks, datasets have to be able to be read by all versions which can require a setup function that creates the anndata object.
+Note that the `h5ad` format has changed since it's inception. While the `anndata` package maintains backwards compatibility, older versions of `anndata` will not be able to read files written by more recent versions. To get around this for the benchmarks, datasets have to be able to be read by all versions which can require a setup function that creates the anndata object.
 
 ## Usage
 
@@ -24,7 +24,7 @@ You can filter out the benchmarks which are run with the `-b {patttern}` flag.
 
 ### Accessing the benchmarks
 
-You can see what benchmarks you've alread run using `asv show`. If you don't specify a commit, it will search for the available commits. If you specify a commit it'll show you those results. For example:
+You can see what benchmarks you've already run using `asv show`. If you don't specify a commit, it will search for the available commits. If you specify a commit it'll show you those results. For example:
 
 ```bash
 $ asv show -b "views"

diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py
@@ -115,7 +115,7 @@ def gen_adata(n_obs, n_var, attr_set):
         X = sparse.random(n_obs, n_var, density=0.1, format="csr")
         X = X.toarray()
     else:
-        # TODO: Theres probably a better way to do this
+        # TODO: There's probably a better way to do this
         X = sparse.random(n_obs, n_var, density=0, format="csr")
     adata = AnnData(X)
     if "obs,var" in attr_set:

diff --git a/docs/api.md b/docs/api.md
@@ -79,7 +79,7 @@ Writing to other formats.
 ## Experimental API
 
 ```{warning}
-API's in the experimenal module are currently in development and subject to change at any time.
+API's in the experimental module are currently in development and subject to change at any time.
 ```
 
 Two classes for working with batched access to collections of many `AnnData` objects or `h5ad` files. In paritcular, for pytorch-based models.

diff --git a/docs/contributing.md b/docs/contributing.md
@@ -11,4 +11,4 @@ AnnData follows the development practices outlined in the [Scanpy contribution g
 ### GPU CI
 
 To test GPU specific code we have a paid self-hosted runner to run the gpu specific tests on.
-This CI runs by default on the main branch, but for PRs requires the `run-gpu-ci` label to prevent unneccesary runs.
+This CI runs by default on the main branch, but for PRs requires the `run-gpu-ci` label to prevent unnecessary runs.
diff --git a/docs/release-notes/0.6.0.md b/docs/release-notes/0.6.0.md
@@ -2,7 +2,7 @@
 
 - better support for aligned mappings (obsm, varm, layers)
   `0.6.22` {pr}`155` {smaller}`I Virshup`
-- convenience accesors {func}`~anndata.AnnData.obs_vector`, {func}`~anndata.AnnData.var_vector` for 1d arrays.
+- convenience accessors {func}`~anndata.AnnData.obs_vector`, {func}`~anndata.AnnData.var_vector` for 1d arrays.
   `0.6.21` {pr}`144` {smaller}`I Virshup`
 - compatibility with Scipy >=1.3 by removing `IndexMixin` dependency.
   `0.6.20` {pr}`151` {smaller}`P Angerer`

diff --git a/docs/release-notes/0.7.2.md b/docs/release-notes/0.7.2.md
@@ -6,7 +6,7 @@
 - Elements of `uns` can now be merged, see {pr}`350`
 - Outer joins now work for `layers` and `obsm`, see {pr}`352`
 - Fill value for outer joins can now be specified
-- Expect improvments in performance, see {issue}`303`
+- Expect improvements in performance, see {issue}`303`
 
 ```{rubric} Functionality
 ```

diff --git a/docs/release-notes/0.9.0.md b/docs/release-notes/0.9.0.md
@@ -32,7 +32,7 @@
 ```
 
 - The `AnnData` `dtype` argument no longer defaults to `float32` {pr}`854` {user}`ivirshup`
-- Previously deprecated `force_dense` arugment {meth}`AnnData.write_h5ad() <anndata.AnnData.write_h5ad>` has been removed. {pr}`855` {user}`ivirshup`
+- Previously deprecated `force_dense` argument {meth}`AnnData.write_h5ad() <anndata.AnnData.write_h5ad>` has been removed. {pr}`855` {user}`ivirshup`
 - Previously deprecated behaviour around storing adjacency matrices in `uns` has been removed {pr}`866` {user}`ivirshup`
 
 ```{rubric} Other updates

diff --git a/pyproject.toml b/pyproject.toml
@@ -143,3 +143,7 @@ select = [
 [tool.ruff.per-file-ignores]
 # E721 comparing types, but we specifically are checking that we aren't getting subtypes (views)
 "anndata/tests/test_readwrite.py" = ["E721"]
+
+[tool.codespell]
+skip = ".git,*.pdf,*.svg"
+ignore-words-list = "theis,coo,homogenous"