From b8489e7724790f02dcb60fe6beaad5459568e225 Mon Sep 17 00:00:00 2001 From: Philipp Heinrich Date: Fri, 18 Oct 2024 13:52:03 +0200 Subject: [PATCH 1/4] bump version --- ccc/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccc/version.py b/ccc/version.py index ceca084..cc620f7 100644 --- a/ccc/version.py +++ b/ccc/version.py @@ -4,4 +4,4 @@ """ -__version__ = "0.12.2" +__version__ = "0.12.3dev0" From 96bed534913913c282801703322b0e8812345a2e Mon Sep 17 00:00:00 2001 From: Philipp Heinrich Date: Fri, 18 Oct 2024 13:58:26 +0200 Subject: [PATCH 2/4] repair publish workflow --- .github/workflows/publish.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 140bb65..6ecfebf 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -21,10 +21,14 @@ jobs: sed -i 's/SITE=beta-install/SITE=standard/' config.mk sudo ./install-scripts/install-linux sudo ldconfig - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.12' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + make install - name: Create Dist run: | make sdist From d7a6e1a8be4106a8c5c9c05806bcae67f8aed9ba Mon Sep 17 00:00:00 2001 From: Philipp Heinrich Date: Fri, 18 Oct 2024 14:13:47 +0200 Subject: [PATCH 3/4] improve github workflows --- .github/workflows/build-test.yml | 2 +- .github/workflows/publish.yml | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index fb7445d..2cc0496 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -1,4 +1,4 @@ -name: build & test +name: Build & test using current Python versions on: [workflow_dispatch, push] diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 6ecfebf..add7b54 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,4 +1,4 @@ -name: Create Dist & Publish on PyPI +name: Create source distribution & publish on PyPI on: workflow_dispatch: @@ -12,24 +12,23 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install CWB run: | sudo apt-get install libncurses5-dev - svn co http://svn.code.sf.net/p/cwb/code/cwb/trunk cwb - cd cwb - sed -i 's/SITE=beta-install/SITE=standard/' config.mk - sudo ./install-scripts/install-linux - sudo ldconfig + wget https://kumisystems.dl.sourceforge.net/project/cwb/cwb/cwb-3.5/deb/cwb_3.5.0-1_amd64.deb + wget https://master.dl.sourceforge.net/project/cwb/cwb/cwb-3.5/deb/cwb-dev_3.5.0-1_amd64.deb + sudo apt-get install ./cwb_3.5.0-1_amd64.deb + sudo apt-get install ./cwb-dev_3.5.0-1_amd64.deb - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install dependencies run: | python -m pip install --upgrade pip make install - - name: Create Dist + - name: Create source distribution run: | make sdist - name: Publish From 5e8381110e852d6cd6f79a5f5103bf9311eddac8 Mon Sep 17 00:00:00 2001 From: Philipp Heinrich Date: Fri, 18 Oct 2024 16:19:54 +0200 Subject: [PATCH 4/4] add functionality for breakdown of anchors / slots --- ccc/cwb.py | 4 ++-- ccc/version.py | 2 +- tests/test_10_dumps.py | 7 +++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ccc/cwb.py b/ccc/cwb.py index 29fea8e..fc1e538 100644 --- a/ccc/cwb.py +++ b/ccc/cwb.py @@ -1477,7 +1477,7 @@ def correct_anchors(self, corrections): """ self.df = correct_anchors(self.df, corrections) - def breakdown(self, p_atts=['word'], flags="", split=False): + def breakdown(self, p_atts=['word'], flags="", split=False, start='match', end='matchend'): """Frequency breakdown of match..matchend. """ @@ -1485,7 +1485,7 @@ def breakdown(self, p_atts=['word'], flags="", split=False): logger.info('creating frequency breakdown') breakdown = self.counts.dump( df_dump=self.df, - start='match', end='matchend', + start=start, end=end, p_atts=p_atts, split=split ) diff --git a/ccc/version.py b/ccc/version.py index cc620f7..d4952fa 100644 --- a/ccc/version.py +++ b/ccc/version.py @@ -4,4 +4,4 @@ """ -__version__ = "0.12.3dev0" +__version__ = "0.12.3" diff --git a/tests/test_10_dumps.py b/tests/test_10_dumps.py index 4147097..d814b95 100644 --- a/tests/test_10_dumps.py +++ b/tests/test_10_dumps.py @@ -55,6 +55,13 @@ def test_breakdown_p_att(germaparl): assert breakdown.loc['gehen']['freq'] == 224 +def test_breakdown_anchor(germaparl): + corpus = get_corpus(germaparl) + restricted = corpus.query('[pos="APPR"]@1[pos="ADJA"][pos="NN"]') + bd = restricted.breakdown(p_atts=['lemma'], start=1, end=1) + assert bd.sort_values(by='freq', ascending=False).index[0] == 'neu' + + def test_matches(germaparl): corpus = get_corpus(germaparl) dump = corpus.query('"SPD"')