Skip to content

Commit

Permalink
Merge pull request #66 from dh-tech/feature/try-mypy
Browse files Browse the repository at this point in the history
Type cleanup based on errors reported by mypy
  • Loading branch information
rlskoeser authored Apr 25, 2024
2 parents 33a8d75 + b679afa commit e27fe84
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 48 deletions.
16 changes: 13 additions & 3 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: style + docs check
name: Check style + docs + types

on:
pull_request:
Expand All @@ -12,17 +12,27 @@ jobs:
working-directory: .

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: 'pip'
cache-dependency-path: '**/setup.cfg'
- name: Install package with dependencies
run: pip install -e ".[dev]"
if: steps.python-cache.outputs.cache-hit != 'true'

# check code style
- name: Run black
run: black src --check --diff

# check docs
- name: Check that documentation can be built
run: tox -e docs

# check types with mypy
- name: Install mypy
run: pip install mypy
- name: Check types in python src directory; install needed types
run: mypy --install-types --non-interactive src
4 changes: 2 additions & 2 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ on:

env:
# python version used to calculate and submit code coverage
COV_PYTHON_VERSION: "3.10"
COV_PYTHON_VERSION: "3.11"

jobs:
python-unit:
runs-on: ubuntu-latest
strategy:
matrix:
python: ["3.8", "3.9", "3.10", "3.11"]
python: ["3.9", "3.10", "3.11", "3.12"]
defaults:
run:
working-directory: .
Expand Down
8 changes: 4 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ keywords = "dates dating uncertainty uncertain-dates unknown partially-known dig
classifiers =
Development Status :: 2 - Pre-Alpha
Programming Language :: Python :: 3
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Intended Audience :: Developers
License :: OSI Approved :: Apache Software License
Operating System :: OS Independent
Expand Down Expand Up @@ -63,7 +63,7 @@ test =
pytest-ordering
pytest-cov
docs =
sphinx<7.0.0
sphinx<7.0.0
sphinx_rtd_theme
m2r2
# pin sphinx because 7.0 currently not compatible with rtd theme
Expand All @@ -72,15 +72,15 @@ docs =
where = src

[tox:tox]
envlist = py38, py39, py310, py311
envlist = py39, py310, py311, py312
isolated_build = True

[gh-actions]
python =
3.8: py38
3.9: py39
3.10: py310
3.11: py311
3.12: py312

[pytest]
minversion = 6.0
Expand Down
7 changes: 4 additions & 3 deletions src/undate/dateformat/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import importlib
import logging
import pkgutil
from typing import Dict
from typing import Dict, Type
from functools import lru_cache # functools.cache not available until 3.9


Expand All @@ -41,11 +41,12 @@ def to_string(self, undate) -> str:
# cache import class method to ensure we only import once
@classmethod
@lru_cache
def import_formatters(cls):
def import_formatters(cls) -> int:
"""Import all undate.dateformat formatters
so that they will be included in available formatters
even if not explicitly imported. Only import once.
returns the count of modules imported."""

logger.debug("Loading formatters under undate.dateformat")
import undate.dateformat

Expand All @@ -65,7 +66,7 @@ def import_formatters(cls):
return import_count

@classmethod
def available_formatters(cls) -> Dict[str, "BaseDateFormat"]:
def available_formatters(cls) -> Dict[str, Type["BaseDateFormat"]]:
# ensure undate formatters are imported
cls.import_formatters()
return {c.name: c for c in cls.__subclasses__()} # type: ignore
7 changes: 6 additions & 1 deletion src/undate/dateformat/iso8601.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]:
if len(parts) == 1:
return self._parse_single_date(parts[0])
elif len(parts) == 2:
return UndateInterval(*[self._parse_single_date(p) for p in parts])
# date range; parse both parts and initialize an interval
start, end = [self._parse_single_date(p) for p in parts]
return UndateInterval(start, end)
else:
# more than two parts = unexpected input
raise ValueError

def _parse_single_date(self, value: str) -> Undate:
# split single iso date into parts; convert to int or None
Expand Down
92 changes: 58 additions & 34 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ class Undate:
#: symbol for unknown digits within a date value
MISSING_DIGIT: str = "X"

earliest: Union[datetime.date, None] = None
latest: Union[datetime.date, None] = None
earliest: datetime.date
latest: datetime.date
#: A string to label a specific undate, e.g. "German Unity Date 2022" for Oct. 3, 2022.
#: Labels are not taken into account when comparing undate objects.
label: Union[str, None] = None
formatter: Union[BaseDateFormat, None] = None
formatter: BaseDateFormat
#: precision of the date (day, month, year, etc.)
precision: DatePrecision = None
precision: DatePrecision

#: known non-leap year
NON_LEAP_YEAR: int = 2022
Expand All @@ -62,7 +62,7 @@ def __init__(
label: Optional[str] = None,
):
# keep track of initial values and which values are known
self.initial_values: Dict[str, Union[int, str]] = {
self.initial_values: Dict[str, Optional[Union[int, str]]] = {
"year": year,
"month": month,
"day": day,
Expand All @@ -85,8 +85,8 @@ def __init__(
min_year = max_year = year
except ValueError:
# year is a string that can't be converted to int
min_year = int(year.replace(self.MISSING_DIGIT, "0"))
max_year = int(year.replace(self.MISSING_DIGIT, "9"))
min_year = int(str(year).replace(self.MISSING_DIGIT, "0"))
max_year = int(str(year).replace(self.MISSING_DIGIT, "9"))
else:
min_year = datetime.MINYEAR
max_year = datetime.MAXYEAR
Expand All @@ -111,7 +111,7 @@ def __init__(
except ValueError:
# if not, calculate min/max for missing digits
min_month, max_month = self._missing_digit_minmax(
month, min_month, max_month
str(month), min_month, max_month
)

# similar to month above — unknown day, but day-level granularity
Expand All @@ -128,14 +128,14 @@ def __init__(
min_day = 1
# if we know year and month (or max month), calculate exactly
if year and month:
_, max_day = monthrange(year, max_month)
_, max_day = monthrange(int(year), max_month)
elif year is None and month:
# If we don't have year and month,
# calculate based on a known non-leap year
# (better than just setting 31, but still not great)
_, max_day = monthrange(self.NON_LEAP_YEAR, max_month)
else:
max_day: int = 31
max_day = 31

# if day is partially specified, narrow min/max further
if day is not None:
Expand All @@ -146,9 +146,10 @@ def __init__(
self.earliest = datetime.date(min_year, min_month, min_day)
self.latest = datetime.date(max_year, max_month, max_day)

if not formatter:
# TODO subclass definitions not available unless they are imported where Undate() is called
formatter = BaseDateFormat.available_formatters()[self.DEFAULT_FORMAT]()
if formatter is None:
# import all subclass definitions; initialize the default
formatter_cls = BaseDateFormat.available_formatters()[self.DEFAULT_FORMAT]
formatter = formatter_cls()
self.formatter = formatter

self.label = label
Expand Down Expand Up @@ -179,9 +180,28 @@ def __repr__(self) -> str:
return "<Undate '%s' (%s)>" % (self.label, self)
return "<Undate %s>" % self

def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
def _comparison_type(self, other: object) -> "Undate":
"""Common logic for type handling in comparison methods.
Converts to Undate object if possible, otherwise raises
NotImplemented error. Currently only supports conversion
from :class:`datetime.date`
"""

# support datetime.date by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)

# recommended to support comparison with arbitrary objects
if not isinstance(other, Undate):
return NotImplemented

return other

def __eq__(self, other: object) -> bool:
# Note: assumes label differences don't matter for comparing dates

other = self._comparison_type(other)

# only a day-precision fully known undate can be equal to a datetime.date
if isinstance(other, datetime.date):
return self.earliest == other and self.latest == other
Expand All @@ -194,19 +214,19 @@ def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
)
# if everything looks the same, check for any unknowns in initial values
# the same unknown date should NOT be considered equal
# (but do we need a different equivalence check for this?)

# NOTE: assumes that partially known values can only be written
# in one format (i.e. X for missing digits).
# If we support other formats, will need to normalize to common
# internal format for comparison
if looks_equal and any("X" in str(val) for val in self.initial_values.values()):
return False

return looks_equal

def __lt__(self, other: Union["Undate", datetime.date]) -> bool:
# support datetime.date by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)
def __lt__(self, other: object) -> bool:
other = self._comparison_type(other)

# if this date ends before the other date starts,
# return true (this date is earlier, so it is less)
Expand Down Expand Up @@ -235,7 +255,7 @@ def __lt__(self, other: Union["Undate", datetime.date]) -> bool:
# for any other case (i.e., self == other), return false
return False

def __gt__(self, other: Union["Undate", datetime.date]) -> bool:
def __gt__(self, other: object) -> bool:
# define gt ourselves so we can support > comparison with datetime.date,
# but rely on existing less than implementation.
# strictly greater than must rule out equals
Expand All @@ -244,13 +264,10 @@ def __gt__(self, other: Union["Undate", datetime.date]) -> bool:
def __le__(self, other: Union["Undate", datetime.date]) -> bool:
return self == other or self < other

def __contains__(self, other: Union["Undate", datetime.date]) -> bool:
def __contains__(self, other: object) -> bool:
# if the two dates are strictly equal, don't consider
# either one as containing the other

# support comparison with datetime by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)
other = self._comparison_type(other)

if self == other:
return False
Expand Down Expand Up @@ -320,7 +337,7 @@ def duration(self) -> datetime.timedelta:

def _missing_digit_minmax(
self, value: str, min_val: int, max_val: int
) -> (int, int):
) -> tuple[int, int]:
# given a possible range, calculate min/max values for a string
# with a missing digit

Expand All @@ -336,17 +353,17 @@ def _missing_digit_minmax(
max_match = max(matches)

# split input string into a list so we can update individually
min_val = list(value)
max_val = list(value)
new_min_val = list(value)
new_max_val = list(value)
for i, digit in enumerate(value):
# replace the corresponding digit with our min and max
if digit == self.MISSING_DIGIT:
min_val[i] = min_match[i]
max_val[i] = max_match[i]
new_min_val[i] = min_match[i]
new_max_val[i] = max_match[i]

# combine the lists of digits back together and convert to int
min_val = int("".join(min_val))
max_val = int("".join(max_val))
min_val = int("".join(new_min_val))
max_val = int("".join(new_max_val))
return (min_val, max_val)


Expand All @@ -362,12 +379,15 @@ class UndateInterval:
"""

# date range between two uncertain dates
earliest: Union[Undate, None]
latest: Union[Undate, None]
label: Union[str, None]

def __init__(
self,
earliest: Union[Undate, None] = None,
latest: Union[Undate, None] = None,
label: Union[str, None] = None,
earliest: Optional[Undate] = None,
latest: Optional[Undate] = None,
label: Optional[str] = None,
):
# for now, assume takes two undate objects
self.earliest = earliest
Expand Down Expand Up @@ -395,6 +415,10 @@ def duration(self) -> datetime.timedelta:
"""
# what is the duration of this date range?

# if range is open-ended, can't calculate
if self.earliest is None or self.latest is None:
return NotImplemented

# if both years are known, subtract end of range from beginning of start
if self.latest.known_year and self.earliest.known_year:
return self.latest.latest - self.earliest.earliest + ONE_DAY
Expand Down
4 changes: 3 additions & 1 deletion tests/test_dateformat/edtf/test_edtf_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ def test_transform(date_string, expected):
transformer = EDTFTransformer()
# parse the input string, then transform to undate object
parsetree = edtf_parser.parse(date_string)
assert transformer.transform(parsetree) == expected
# since the same unknown date is not considered strictly equal,
# compare object representations
assert repr(transformer.transform(parsetree)) == repr(expected)
3 changes: 3 additions & 0 deletions tests/test_undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,6 @@ def test_duration(self):
Undate(None, 6, 7), Undate(None, 6, 6)
).duration()
assert month_noyear_duration.days == 365

# duration is not supported for open-ended intervals
assert UndateInterval(Undate(2000), None).duration() == NotImplemented

0 comments on commit e27fe84

Please sign in to comment.