Skip to content

Commit

Permalink
places, concepts: GND closeMatch corrections
Browse files Browse the repository at this point in the history
Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep committed Nov 18, 2024
1 parent 2aa1bf1 commit 5304543
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 62 deletions.
23 changes: 11 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions rero_mef/alembic/d8536341fc5e_delete_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def upgrade():
ids.append(id_)
rec = agent_cls.get_record(id_)
rec.pop("identifier", None)
rec.update(data=rec, bcommit=False, reindex=True)
rec.update(data=rec, dbcommit=False, reindex=True)
if idx % 1000 == 0:
print(f" {idx} commit", end=" | ", flush=True)
db.session.commit()
Expand Down Expand Up @@ -102,7 +102,7 @@ def downgrade():
ids.append(id_)
rec = agent_cls.get_record(id_)
rec["identifier"] = f'"{url}{rec.pid}"'
rec.update(data=rec, bcommit=False, reindex=True)
rec.update(data=rec, dbcommit=False, reindex=True)
if idx % 1000 == 0:
print(f" {idx} commit", end=" | ", flush=True)
db.session.commit()
Expand Down
33 changes: 20 additions & 13 deletions rero_mef/marctojson/do_gnd_concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,8 @@ def trans_gnd_relation(self):
if value:
self.json_dict[relation] = value

def trans_gnd_classification(self):
"""Transformation classification from field 686."""
if self.logger and self.verbose:
self.logger.info("Call Function", "trans_gnd_classification")
# TODO: find classification

def trans_gnd_match(self):
"""Transformation closeMatch and exactfrom field 750."""
"""Transformation closeMatch and exactMatch from field 750."""
if self.logger and self.verbose:
self.logger.info("Call Function", "trans_gnd_match")
for field_750 in self.marc.get_fields("750"):
Expand Down Expand Up @@ -253,29 +247,42 @@ def trans_gnd_match(self):
if authorized_ap := build_string_from_field(
field=field_750, subfields=subfields, tag_grouping=tag_grouping
):
match = {
match_data = {
"authorized_access_point": authorized_ap,
"source": "GND",
}
identified_by = []
other_source = None
for subfield_0 in field_750.get_subfields("0"):
if subfield_0.startswith("http"):
match.setdefault("identifiedBy", []).append(
identified_by.insert(
0,
{
"type": "uri",
"value": subfield_0,
}
},
)
if other_source:
identified_by[0]["source"] = other_source
else:
source, id_ = get_source_and_id(subfield_0)
if source:
match.setdefault("identifiedBy", []).append(
insert_pos = -1
if source != "GND":
other_source = source
match_data["source"] = other_source
insert_pos = 0
identified_by.insert(
insert_pos,
{
"source": source,
"type": "bf:Nbn",
"value": id_,
}
},
)
self.json_dict.setdefault(match_type, []).append(match)
if identified_by:
match_data["identifiedBy"] = identified_by
self.json_dict.setdefault(match_type, []).append(match_data)

def trans_gnd_note(self):
"""Transformation notes from field.
Expand Down
25 changes: 19 additions & 6 deletions rero_mef/marctojson/do_gnd_places.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,29 +270,42 @@ def trans_gnd_match(self):
if authorized_ap := build_string_from_field(
field=field_751, subfields=subfields, tag_grouping=tag_grouping
):
match = {
match_data = {
"authorized_access_point": authorized_ap,
"source": "GND",
}
identified_by = []
other_source = None
for subfield_0 in field_751.get_subfields("0"):
if subfield_0.startswith("http"):
match.setdefault("identifiedBy", []).append(
identified_by.insert(
0,
{
"type": "uri",
"value": subfield_0,
}
},
)
if other_source:
identified_by[0]["source"] = other_source
else:
source, id_ = get_source_and_id(subfield_0)
if source:
match.setdefault("identifiedBy", []).append(
insert_pos = -1
if source != "GND":
other_source = source
match_data["source"] = other_source
insert_pos = 0
identified_by.insert(
insert_pos,
{
"source": source,
"type": "bf:Nbn",
"value": id_,
}
},
)
self.json_dict.setdefault(match_type, []).append(match)
if identified_by:
match_data["identifiedBy"] = identified_by
self.json_dict.setdefault(match_type, []).append(match_data)

def trans_gnd_note(self):
"""Transformation notes from field.
Expand Down
61 changes: 46 additions & 15 deletions tests/unit/concepts/examples/xml_minimal_record.xml
Original file line number Diff line number Diff line change
@@ -1,24 +1,55 @@

<record>
<leader>00589nx a2200193 45 </leader>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">027630501</subfield>
<subfield code="9">sudoc</subfield>
<datafield tag="670" ind1=" " ind2=" ">
<subfield code="a">
Grand Larousse universel (art. : Livre)
</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">frBN001940328</subfield>
<datafield tag="675" ind1=" " ind2=" ">
<subfield code="a">
Laval RVM (en ligne), 2004-11-23
</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">frBN000000089</subfield>
<datafield tag="680" ind1=" " ind2=" ">
<subfield code="a">
Mers profondément engagées dans la masse des continents
</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">FRBNF118620892</subfield>
<subfield code="z">FRBNF11862089</subfield>
<datafield tag="667" ind1=" " ind2=" ">
<subfield code="a">Note interne</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">http://viaf.org/viaf/124265140</subfield>
<subfield code="2">VIAF</subfield>
<subfield code="C">VIAF</subfield>
<subfield code="d">20200302</subfield>
<datafield tag="260" ind1=" " ind2="9">
<subfield code="a">
Voir le descripteur Opposition (science politique)
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2="9">
<subfield code="a">
Combiner un des descripteurs Mouvements contestataires
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">
Voir les vedettes : Mouvements contestataires ; Opposition
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">
Voir les vedettes du type : Antifascisme ; Mouvements
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">
Voir aux mouvements d'opposition particuliers, par ex. : Combat
</subfield>
</datafield>
<datafield tag="360" ind1=" " ind2=" ">
<subfield code="a">
Voir aussi aux mers et océans particuliers
</subfield>
</datafield>
<datafield tag="016" ind1=" " ind2=" ">
<subfield code="9">VF3, NC3, NC30</subfield>
</datafield>
</record>
23 changes: 14 additions & 9 deletions tests/unit/concepts/test_concepts_gnd_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,33 +174,38 @@ def test_gnd_close_match():
"closeMatch": [
{
"authorized_access_point": "Atlases",
"source": "GND",
"source": "DLC",
"identifiedBy": [
{
"source": "GND",
"type": "bf:Nbn",
"value": "(DE-101)1134384173",
"source": "DLC",
"type": "uri",
"value": "http://id.loc.gov/authorities/subjects/sh85009231",
},
{
"source": "DLC",
"type": "bf:Nbn",
"value": "sh85009231",
},
{
"type": "uri",
"value": "http://id.loc.gov/authorities/subjects/sh85009231",
"source": "GND",
"type": "bf:Nbn",
"value": "(DE-101)1134384173",
},
],
}
],
"exactMatch": [
{
"authorized_access_point": "Atlas",
"source": "GND",
"source": "DNLM",
"identifiedBy": [
{"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"},
{
"source": "DNLM",
"type": "uri",
"value": "http://id.nlm.nih.gov/mesh/D020466",
},
{"source": "DNLM", "type": "bf:Nbn", "value": "D020466"},
{"type": "uri", "value": "http://id.nlm.nih.gov/mesh/D020466"},
{"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"},
],
},
],
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/places/test_places_gnd_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,17 @@ def test_gnd_close_match():
"exactMatch": [
{
"authorized_access_point": "Venedig",
"source": "GND",
"source": "ZBW",
"identifiedBy": [
{
"source": "GND",
"source": "ZBW",
"type": "bf:Nbn",
"value": "(DE-101)997977663",
"value": "091419204",
},
{
"source": "ZBW",
"source": "GND",
"type": "bf:Nbn",
"value": "091419204",
"value": "(DE-101)997977663",
},
],
}
Expand Down

0 comments on commit 5304543

Please sign in to comment.