Skip to content

Commit

Permalink
For #2175 validate go id
Browse files Browse the repository at this point in the history
  • Loading branch information
mugitty committed Nov 10, 2023
1 parent 625ee02 commit ba4eded
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 2 deletions.
9 changes: 8 additions & 1 deletion ontobio/io/gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,15 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u
qualifiers = [association.Curie.from_str(curie_util.contract_uri(relations.lookup_label(q), strict=False)[0]) for q in qualifiers]

object = association.Term(association.Curie.from_str(gaf_line[4]), taxon)
if isinstance(object, association.Error):
if isinstance(object, association.Error) or isinstance(object.id, association.Error):
report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Problem parsing GO Term", taxon=gaf_line[TAXON_INDEX], rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

# Check GO Term namespace and identifier
go_term = object.id
if go_term.namespace != "GO" or go_term.identity.isnumeric == False:
report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Namespace should be \"GO\" and identity a numeric value greater than \"0\"", taxon=gaf_line[TAXON_INDEX], rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

# References
references = [association.Curie.from_str(e) for e in gaf_line[5].split("|") if e]
Expand Down
10 changes: 9 additions & 1 deletion ontobio/io/gpadparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ def from_1_2(gpad_line: List[str], report=None, group="unknown", dataset="unknow
if go_term.is_error():
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[3], "Problem parsing GO Term", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

if go_term.namespace != "GO" or go_term.identity.isnumeric == False:
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[3], "Namespace should be \"GO\" and identity a numeric value greater than \"0\"", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

object = association.Term(go_term, taxon)

Expand Down Expand Up @@ -449,7 +453,11 @@ def from_2_0(gpad_line: List[str], report=None, group="unknown", dataset="unknow
if go_term.is_error():
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[ONTOLOGY_CLASS_INDEX], "Problem parsing GO Term", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)


if go_term.namespace != "GO" or go_term.identity.isnumeric == False:
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[ONTOLOGY_CLASS_INDEX], "Namespace should be \"GO\" and identity a numeric value greater than \"0\"", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

object = association.Term(go_term, taxon)

evidence_type = association.Curie.from_str(gpad_line[EVIDENCE_INDEX])
Expand Down
6 changes: 6 additions & 0 deletions tests/test_gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,12 @@ def test_bad_date():
assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\tTODAY\tPomBase\tfoo(X:1)")
assert assoc_result.skipped == True
assert assoc_result.associations == []

def test_bad_go_id():
p = GafParser()
assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tINVALID:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)")
assert assoc_result.skipped == True
assert assoc_result.associations == []

def test_bad_taxon():
p = GafParser()
Expand Down
44 changes: 44 additions & 0 deletions tests/test_gpad_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,27 @@ def test_parse_interacting_taxon():
]
result = to_association(list(vals), report=report, version="1.2")
assert result.associations[0].interacting_taxon == Curie(namespace="NCBITaxon", identity="5678")

def test_parse_go_id_1_2():
report = assocparser.Report(group="unknown", dataset="unknown")
vals = [
"MGI",
"MGI:1918911",
"enables",
"UBERON:1234",
"MGI:MGI:2156816|GO_REF:0000015",
"ECO:0000307",
"",
"",
"20100209",
"MGI",
"",
"creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
]
result = to_association(list(vals), report=report, version="1.2")
assert result.skipped == 1
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 1
assert len(result.associations) == 0


def test_duplicate_key_annot_properties():
Expand Down Expand Up @@ -189,6 +210,29 @@ def test_parse_2_0():
# Test annotation property retrieval
contributors = result.associations[0].annotation_property_values(property_key="contributor-id")
assert set(contributors) == {"http://orcid.org/0000-0003-2689-5511"}


def test_parse_go_id_2_0():
version = "2.0"
report = assocparser.Report(group="unknown", dataset="unknown")
vals = [
"MGI:MGI:1918911",
"",
"RO:0002327",
"UBERON:5678",
"MGI:MGI:2156816|GO_REF:0000015",
"ECO:0000307",
"",
"",
"2020-09-17",
"MGI",
"",
"creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
]
result = to_association(list(vals), report=report, version=version)
assert result.skipped == 1
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 1
assert len(result.associations) == 0


def test_aspect_fill_for_obsolete_terms():
Expand Down

0 comments on commit ba4eded

Please sign in to comment.