From f3c637dfdb4d74724d5d0b36119a237e7d36b5de Mon Sep 17 00:00:00 2001 From: Sheean Spoel Date: Fri, 8 Mar 2024 15:27:54 +0100 Subject: [PATCH] Updated sastadev, clean-up --- mwe_query/canonicalform.py | 3 +++ mwe_query/trymwes.py | 42 +++++++++++++++++++++++--------------- requirements.txt | 6 +++--- setup.py | 2 +- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/mwe_query/canonicalform.py b/mwe_query/canonicalform.py index 3ed0b30..054c3a1 100644 --- a/mwe_query/canonicalform.py +++ b/mwe_query/canonicalform.py @@ -394,6 +394,7 @@ def mknewnode(stree: SynTree, mwetop: int, atts: List[str], annotations: List[in newnode.attrib['maxnodecount'] = f'{len(stree)}' return newnode + def expandnonheadwordnode(nonheadwordnode, phrasenodeproperties): phraserel = gav(nonheadwordnode, 'rel') newnonheadwordnode = copy.copy(nonheadwordnode) @@ -402,6 +403,8 @@ def expandnonheadwordnode(nonheadwordnode, phrasenodeproperties): phrasenode.attrib['rel'] = phraserel phrasenode.append(newnonheadwordnode) return phrasenode + + def zullenheadclause(stree: SynTree) -> bool: if stree.tag == 'node': cat = gav(stree, 'cat') diff --git a/mwe_query/trymwes.py b/mwe_query/trymwes.py index 3962fc2..b97737b 100644 --- a/mwe_query/trymwes.py +++ b/mwe_query/trymwes.py @@ -1,17 +1,21 @@ from sastadev.alpinoparsing import parse -from lcat import expandnonheadwords -from sastadev.treebankfunctions import indextransform from lxml import etree -from canonicalform import generatequeries, expandfull +from .canonicalform import generatequeries, expandfull debug = False -geenhaankraaien = ('0geen *haan zal naar iets kraaien', - ['Daar kraait geen haan naar', 'Hier heeft geen haan naar gekraaid', - 'geen haan kraaide daarnaar', 'geen haan kraaide ernaar dat hij niet kwam', - 'geen haan kraaide er naar dat hij niet kwam', - 'er is geen haan die daar naar kraait', ] - ) +geenhaankraaien = ( + "0geen *haan zal naar iets kraaien", + [ + "Daar kraait geen haan naar", + "Hier heeft geen haan naar gekraaid", + "geen haan kraaide daarnaar", + "geen haan kraaide ernaar dat hij niet kwam", + "geen haan kraaide er naar dat hij niet kwam", + "er is geen haan die daar naar kraait", + ], +) + def select(mweutts, utt=None): if utt is None: @@ -20,6 +24,7 @@ def select(mweutts, utt=None): result = (mweutts[0], [mweutts[1][utt]]) return result + def getparses(utterances): uttparses = [] for utterance in utterances: @@ -27,26 +32,29 @@ def getparses(utterances): uttparses.append(uttparse) return uttparses + def trysomemwes(): mwe, utterances = select(geenhaankraaien) mwequeries = generatequeries(mwe) - labeledmwequeries = (('MWEQ', mwequeries[0]), ('NMQ', mwequeries[1]), ('MLQ', mwequeries[2])) + labeledmwequeries = ( + ("MWEQ", mwequeries[0]), + ("NMQ", mwequeries[1]), + ("MLQ", mwequeries[2]), + ) uttparses = getparses(utterances) for utterance, uttparse in zip(utterances, uttparses): - print(f'{utterance}:') + print(f"{utterance}:") expandeduttparse = expandfull(uttparse) if debug: etree.dump(expandeduttparse) for label, mwequery in labeledmwequeries: results = expandeduttparse.xpath(mwequery) if debug: - print('Found hits:') + print("Found hits:") for result in results: etree.dump(result) - print(f'{label}: {len(results)}') - - + print(f"{label}: {len(results)}") -if __name__ == '__main__': - trysomemwes() \ No newline at end of file +if __name__ == "__main__": + trysomemwes() diff --git a/requirements.txt b/requirements.txt index f91a84e..299f474 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: # # pip-compile # @@ -36,7 +36,7 @@ requests==2.31.0 # via # alpino-query # mwe-query (setup.py) -sastadev==0.1.1 +sastadev==0.1.4 # via # auchann # mwe-query (setup.py) diff --git a/setup.py b/setup.py index 77a74b4..855e9b8 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ package_data={"mwe_query": ["py.typed"]}, zip_safe=True, install_requires=[ - 'alpino-query>=2.1.8', 'requests', 'BaseXClient', 'sastadev>=0.1.1' + 'alpino-query>=2.1.8', 'requests', 'BaseXClient', 'sastadev>=0.1.4' ], entry_points={ 'console_scripts': [