Skip to content

Commit

Permalink
fixes paper id and authors list handling
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Mar 22, 2023
1 parent b3dd56f commit 0a85e11
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 1 deletion.
2 changes: 2 additions & 0 deletions ceurws/ceur_ws.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,8 @@ def __init__(self):
primaryKey="id",
entityPluralName="papers",
config=CEURWS.CONFIG,
handleInvalidListTypes=True,
listSeparator=",",
name=self.__class__.__name__)


Expand Down
6 changes: 6 additions & 0 deletions ceurws/papertocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,20 @@ def parsePapers(self):
paper_records=[]
toc=self.soup.find(attrs={"class": "CEURTOC"})
if toc:
index=0
paper_ids=[]
for paper_li in toc.findAll('li'):
index+=1
paper_record = self.scrape.parseWithScrapeDescription(paper_li, self.scrapeDescr)
paper_record["vol_number"]=self.number
href=paper_li.find('a', href=True)
if href:
paper_record["pdf_name"]=href.attrs["href"]
if "id" in paper_li.attrs:
paper_id=paper_li.attrs["id"]
if paper_id in paper_ids:
paper_id=f"{paper_id}-duplicate-{index}"
paper_ids.append(paper_id)
paper_record["id"]=f"Vol-{self.number}/{paper_id}"
paper_records.append(paper_record)
pass
Expand Down
2 changes: 1 addition & 1 deletion tests/test_papertocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_volExamples(self):
"""
tests parsing of volume examples
"""
vol_examples = [(83,12),(1,15),(3264,10),(3343,7)]
vol_examples = [(2376,35),(2379,8),(1,15),(83,12),(3264,10),(3343,7)]
counter=Counter()
debug=self.debug
debug=True
Expand Down

0 comments on commit 0a85e11

Please sign in to comment.