-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/develop' into mwestats
- Loading branch information
Showing
5 changed files
with
108 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# This CITATION.cff file was generated with cffinit. | ||
# Visit https://bit.ly/cffinit to generate yours today! | ||
|
||
cff-version: 1.2.0 | ||
title: MWE Query | ||
message: >- | ||
If you use this software, please cite it using the | ||
metadata from this file. | ||
type: software | ||
authors: | ||
- given-names: Jan | ||
family-names: Odijk | ||
affiliation: Utrecht University | ||
- given-names: Martin | ||
family-names: Kroon | ||
affiliation: Utrecht University | ||
orcid: 'https://orcid.org/0000-0003-3059-6872' | ||
- name: >- | ||
Research Software Lab, Centre for Digital Humanities, | ||
Utrecht University | ||
website: >- | ||
https://cdh.uu.nl/centre-for-digital-humanities/research-software-lab/ | ||
city: Utrecht | ||
country: NL | ||
identifiers: | ||
- type: doi | ||
value: 10.5281/zenodo.10410636 | ||
repository-code: 'https://github.com/UUDigitalHumanitieslab/mwe-query' | ||
license: BSD-3-Clause |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from sastadev.alpinoparsing import parse | ||
from lcat import expandnonheadwords | ||
from sastadev.treebankfunctions import indextransform | ||
from lxml import etree | ||
from canonicalform import generatequeries, expandfull | ||
|
||
debug = False | ||
|
||
geenhaankraaien = ('0geen *haan zal naar iets kraaien', | ||
['Daar kraait geen haan naar', 'Hier heeft geen haan naar gekraaid', | ||
'geen haan kraaide daarnaar', 'geen haan kraaide ernaar dat hij niet kwam', | ||
'geen haan kraaide er naar dat hij niet kwam', | ||
'er is geen haan die daar naar kraait', ] | ||
) | ||
|
||
def select(mweutts, utt=None): | ||
if utt is None: | ||
result = mweutts | ||
else: | ||
result = (mweutts[0], [mweutts[1][utt]]) | ||
return result | ||
|
||
def getparses(utterances): | ||
uttparses = [] | ||
for utterance in utterances: | ||
uttparse = parse(utterance) | ||
uttparses.append(uttparse) | ||
return uttparses | ||
|
||
def trysomemwes(): | ||
mwe, utterances = select(geenhaankraaien) | ||
mwequeries = generatequeries(mwe) | ||
labeledmwequeries = (('MWEQ', mwequeries[0]), ('NMQ', mwequeries[1]), ('MLQ', mwequeries[2])) | ||
uttparses = getparses(utterances) | ||
for utterance, uttparse in zip(utterances, uttparses): | ||
print(f'{utterance}:') | ||
expandeduttparse = expandfull(uttparse) | ||
if debug: | ||
etree.dump(expandeduttparse) | ||
for label, mwequery in labeledmwequeries: | ||
results = expandeduttparse.xpath(mwequery) | ||
if debug: | ||
print('Found hits:') | ||
for result in results: | ||
etree.dump(result) | ||
print(f'{label}: {len(results)}') | ||
|
||
|
||
|
||
|
||
if __name__ == '__main__': | ||
trysomemwes() |