Skip to content

Commit

Permalink
Merge pull request #22 from SamEdwardes/feature/python3.12
Browse files Browse the repository at this point in the history
Add support for Python 3.12
  • Loading branch information
SamEdwardes authored Oct 4, 2024
2 parents 802ec31 + 14ca369 commit fa15bd5
Show file tree
Hide file tree
Showing 9 changed files with 1,893 additions and 2,534 deletions.
14 changes: 5 additions & 9 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
pull_request:
branches: [ main ]
push:
branches:
branches:
- main
workflow_dispatch:

Expand All @@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v2
Expand All @@ -29,13 +29,9 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y poppler-utils tesseract-ocr libtesseract-dev
- name: Setup poetry
- name: Setup uv
run: |
python -m pip install --upgrade pip wheel setuptools
curl -sSL https://install.python-poetry.org | python3 -
- name: Install dependencies
run: |
poetry install --all-extras
curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Test with pytest
run: |
poetry run pytest
uv run --python ${{ matrix.python-version }} --all-extras pytest
13 changes: 13 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Changelog

## 0.3.2 (2024-10-04)

**Changes**

- Support for Python 3.8 to 3.12 and all future 3.0 versions of Python ([#16](https://github.com/SamEdwardes/spacypdfreader/issues/16), [#21](https://github.com/SamEdwardes/spacypdfreader/issues/21))
- Added local testing to test matrix of supported Python versions.
- Switch from poetry to uv for managing project dependencies and building project.
- Update dependencies.

**Fixes**

None

## 0.3.1 (2023-10-17)

**Changes**
Expand Down
10 changes: 8 additions & 2 deletions docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ Before merging changes into main the following must be completed:

- [ ] Bump the version number in *pyproject.toml* and *spacypdfreader.__init__.py*
- [ ] Format the code: `just format`
- [ ] Run pytest: `just test`
- [ ] Run pytest:

```bash
just test-matrix
just test-docs
```
- Test publishing to test PyPI: `just publish-test`
- [ ] Check the docs locally: `just preview-docs`

After merging the pull request:
Expand All @@ -17,7 +23,7 @@ After merging the pull request:

## Code style

The black code formatter should be run against all code.
The ruff code formatter should be run against all code.

```bash
just format
Expand Down
69 changes: 55 additions & 14 deletions justfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,66 @@
default:
@just --list

[group('package')]
build:
rm -rf dist
uv build

[group('package')]
publish-test:
rm -rf dist
uv build
uv publish --token $(op read "op://Private/Test PyPI/Token") --publish-url https://test.pypi.org/legacy/
open https://test.pypi.org/project/spacypdfreader/

[group('package')]
publish:
rm -rf dist
uv build
uv publish --token $(op read "op://Private/PyPI/Token")
open https://pypi.org/project/spacypdfreader/

[group('lint')]
format:
# Source code
poetry run black spacypdfreader
poetry run isort spacypdfreader
# Tests
poetry run black tests
poetry run isort tests
# Sort imports
uvx ruff check --select I --fix .
# Format code
uvx ruff format .

test:
poetry run pytest
poetry run pytest --doctest-modules spacypdfreader/
[group('lint')]
lint:
uvx ruff check .

[group('tests')]
test version="3.12":
uv run --python {{version}} --all-extras pytest

[group('tests')]
test-matrix:
just test 3.9
just test 3.10
just test 3.11
just test 3.12

[group('tests')]
test-pre-release-python:
# As of 2024-10-04 3.13 is failing
just test 3.13

[group('tests')]
test-gha:
gh workflow run pytest.yml --ref $(git branch --show-current)

[group('docs')]
preview-docs:
poetry run mkdocs serve
uv run mkdocs serve

[group('docs')]
publish-docs:
rm -rf site
mkdocs build
mkdocs gh-deploy
uv run mkdocs build
uv run mkdocs gh-deploy

publish:
poetry publish --build
[group('docs')]
test-docs:
uv run --python 3.12 --all-extras pytest --doctest-modules spacypdfreader/
5 changes: 3 additions & 2 deletions notebooks/spacypdfreader-demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
"outputs": [],
"source": [
"import spacy\n",
"\n",
"from spacypdfreader import pdf_reader\n",
"from spacypdfreader.parsers.pytesseract import PytesseractParser\n",
"from spacypdfreader.parsers.pdfminer import PdfminerParser"
"from spacypdfreader.parsers.pdfminer import PdfminerParser\n",
"from spacypdfreader.parsers.pytesseract import PytesseractParser"
]
},
{
Expand Down
2,479 changes: 0 additions & 2,479 deletions poetry.lock

This file was deleted.

66 changes: 39 additions & 27 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,36 +1,48 @@
[tool.poetry]
[project]
name = "spacypdfreader"
version = "0.3.1"
version = "0.3.2"
description = "A PDF to text extraction pipeline component for spaCy."
authors = ["SamEdwardes <[email protected]>"]
license = "MIT"
readme = "README.md"
repository = "https://github.com/SamEdwardes/spaCyPDFreader"
maintainers = [
{name = "Sam Edwardes", email = "[email protected]"}
]
keywords = ["python", "spacy", "nlp", "pdf", "pdfs"]
requires-python = ">=3.9"
dependencies = [
"pdfminer-six>=20240706",
"rich>=13.9.2",
"spacy>=3.8.2",
]

[tool.poetry.dependencies]
python = ">=3.8,<3.12"
spacy = "^3.4"
rich = "^10.15.2"
"pdfminer.six" = "^20211012"
pytesseract = {version = "^0.3.8", optional = true}
pdf2image = {version = "^1.16.0", optional = true}
numpy = "^1.24.2"
Pillow = {version = "^9.4.0", optional = true}
[project.urls]
Homepage = "https://samedwardes.github.io/spacypdfreader"
Documentation = "https://samedwardes.github.io/spacypdfreader"
Repository = "https://github.com/SamEdwardes/spaCyPDFreader.git"
Issues = "https://github.com/SamEdwardes/spaCyPDFreader/issues"
Changelog = "https://github.com/SamEdwardes/spacypdfreader/blob/main/docs/changelog.md"

[tool.poetry.extras]
pytesseract = ["pytesseract", "Pillow", "pdf2image"]

[tool.poetry.group.dev.dependencies]
pytest = "^6.2.5"
en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl"}
mkdocs-material = "^9.1.1"
mkdocs-include-markdown-plugin = "^4.0.3"
mkdocstrings = {extras = ["python"], version = "^0.20.0"}
ipykernel = "^6.25.2"
black = "^23.9.1"
isort = "^5.12.0"
[project.optional-dependencies]
pytesseract = [
"pdf2image>=1.17.0",
"pillow>=10.4.0",
"pytesseract>=0.3.13",
]

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.uv]
dev-dependencies = [
"mkdocs>=1.6.1",
"mkdocs-include-markdown-plugin>=6.2.2",
"mkdocs-material>=9.5.39",
"pytest>=8.3.3",
"en-core-web-sm",
"mkdocstrings>=0.26.1",
"mkdocstrings-python>=1.11.1",
]

[tool.uv.sources]
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
2 changes: 1 addition & 1 deletion tests/test_spacypdfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


def test_version():
assert __version__ == "0.3.1"
assert __version__ == "0.3.2"


def test_get_number_of_pages():
Expand Down
Loading

0 comments on commit fa15bd5

Please sign in to comment.