Skip to content

Commit

Permalink
pybibupdate functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
wirhabenzeit committed Jan 10, 2023
1 parent 963a2a9 commit a469f93
Show file tree
Hide file tree
Showing 13 changed files with 822 additions and 340 deletions.
83 changes: 0 additions & 83 deletions bibliography.bib

This file was deleted.

66 changes: 28 additions & 38 deletions examples/example.bib
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,17 @@ @article{MR0026286
DOI = "10.1002/j.1538-7305.1948.tb01338.x",
URL = "https://doi.org/10.1002/j.1538-7305.1948.tb01338.x"
}

@article{10.1109/TIT.2006.885507,
AUTHOR = "Candes, Emmanuel J. and Tao, Terence",
TITLE = "Near-optimal signal recovery from random projections: universal encoding strategies?",
JOURNAL = "IEEE Trans. Inform. Theory",
FJOURNAL = "Institute of Electrical and Electronics Engineers. Transactions on Information Theory",
VOLUME = "52",
YEAR = "2006",
NUMBER = "12",
PAGES = "5406--5425",
ISSN = "0018-9448",
MRCLASS = "94A12 (41A25 94A13)",
MRNUMBER = "2300700",
MRREVIEWER = "L. L. Campbell",
DOI = "10.1109/TIT.2006.885507",
URL = "https://doi.org/10.1109/TIT.2006.885507"
title = "{Near}-optimal signal recovery from random projections: {Universal} encoding strategies?",
author = "Candes, Emmanuel J. and Tao, Terence",
year = "2006",
doi = "10.1109/TIT.2006.885507",
url = "https://doi.org/10.1109/TIT.2006.885507",
journal = "IEEE Trans. Inf. Theory",
volume = "52",
number = "12",
pages = "5406--5425"
}

@unpublished{math/0211159,
Expand All @@ -40,42 +36,36 @@ @unpublished{math/0211159
}

@article{PMID:271968,
title = "{DNA} sequencing with chain-terminating inhibitors.",
author = "Sanger, F. and Nicklen, S. and Coulson, A. R.",
year = "1977",
doi = "10.1073/pnas.74.12.5463",
url = "https://doi.org/10.1073/pnas.74.12.5463",
year = "1977",
publisher = "Proceedings of the National Academy of Sciences",
journal = "Proc. Natl. Acad. Sci. U.S.A.",
volume = "74",
number = "12",
pages = "5463--5467",
title = "{DNA} sequencing with chain-terminating inhibitors",
journal = "Proceedings of the National Academy of Sciences",
PMID = "271968"
pmid = "PMID:271968"
}

@inproceedings{10.1109/CVPR.2016.90,
title = "{Deep} residual learning for image recognition",
author = "He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian",
doi = "10.1109/cvpr.2016.90",
url = "https://doi.org/10.1109/cvpr.2016.90",
year = "2016",
publisher = "{IEEE}",
title = "{Deep} {Residual} {Learning} for {Image} {Recognition}",
booktitle = "2016 {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})"
doi = "10.1109/CVPR.2016.90",
url = "https://doi.org/10.1109/CVPR.2016.90",
publisher = "IEEE Computer Society",
booktitle = "{Proc} {IEEE} {Comput} {Soc} {Conf} {Comput} {Vision} {Pattern} {Recognit}"
}

@article{10.4310/ATMP.1998.v2.n2.a1,
AUTHOR = "Maldacena, Juan",
TITLE = "The large {$N$} limit of superconformal field theories and supergravity",
JOURNAL = "Adv. Theor. Math. Phys.",
FJOURNAL = "Advances in Theoretical and Mathematical Physics",
VOLUME = "2",
YEAR = "1998",
NUMBER = "2",
PAGES = "231--252",
ISSN = "1095-0761",
MRCLASS = "81T30 (81T60 83E30)",
MRNUMBER = "1633016",
MRREVIEWER = "Douglas J. Smith",
DOI = "10.4310/ATMP.1998.v2.n2.a1",
URL = "https://doi.org/10.4310/ATMP.1998.v2.n2.a1"
title = "{The} large {N} {Limit} of superconformal field theories and supergravity",
author = "Maldacena, Juan",
year = "1998",
doi = "10.4310/ATMP.1998.v2.n2.a1",
url = "https://doi.org/10.4310/ATMP.1998.v2.n2.a1",
journal = "Adv. Theor. Math. Phys.",
volume = "2",
number = "2",
pages = "231--252"
}
Binary file modified examples/example_biber.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion examples/example_biber.tex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

\usepackage[backend=biber, datamodel=pmid-mr-arxiv,url=false]{biblatex}
\usepackage{hyperref}
\addbibresource{example.bib}
\addbibresource{example.bib}

\makeatletter
\DeclareFieldFormat{eprint}{\href{https://arxiv.org/\abx@arxivpath/#1}{\nolinkurl{arXiv:#1}}}
Expand Down
Binary file modified examples/example_bibtex.pdf
Binary file not shown.
8 changes: 4 additions & 4 deletions examples/example_bibtex.tex
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
\usepackage{hyperref}
\bibliographystyle{plain}

\begin{document}

\nocite{MR0026286,10.1109/TIT.2006.885507,math/0211159,PMID:271968,10.1109/CVPR.2016.90,10.4310/ATMP.1998.v2.n2.a1}
\begin{document}
\nocite{MR0026286,10.1109/TIT.2006.885507,math/0211159,PMID:271968,10.1109/CVPR.2016.90,10.4310/ATMP.1998.v2.n2.a1}

\bibliography{example}

\end{document}
\end{document}
1 change: 1 addition & 0 deletions pybibget.egg-info/entry_points.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[console_scripts]
pybibget = pybibget:pybibget
pybibparse = pybibget:pybibparse
pybibupdate = pybibget:pybibupdate
77 changes: 47 additions & 30 deletions pybibget/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,54 @@
import re
import sys
import logging as log
from enum import Enum
import pybtex.database
import pybibget.bibentry as bibentry
log.getLogger('asyncio').setLevel(log.WARNING)
from pybibget.bibentry import Bibget
from pybtex.database import parse_string

def add_optional_args(parser):
parser.add_argument('-v', '--verbose', action='store_true', help='verbose output')
parser.add_argument('-d', '--debug', action='store_true', help='debug output')
parser.add_argument('--skip-doi-msc', action='store_true', help='skip MathSciNet lookup for DOIs')


def pybibget() -> None:
def pybibget():
"""
Reads citation keys from command line and calls get_citations()
"""
parser = argparse.ArgumentParser(prog='pybibget', description='Command line utility to automatically retrieve BibTeX citations from MathSciNet, arXiv and PubMed')

parser.add_argument('keys', type=str, metavar='citekeys', nargs='*', help='MathSciNet (MRxxxxx), arXiv (xxxx.xxxxx), PubMed (PMID:xxxxxxxx) or DOI (10.xxx/xxxxx) citation keys (separated by spaces)')
parser.add_argument('-v', '--verbose', action='store_true', help='verbose output')
parser.add_argument('-d', '--debug', action='store_true', help='debug output')
parser.add_argument('-f', action='store', dest='file_name', help='Append output to file (default: write output to stdout)')

parser.add_argument('-w', action='store', dest='file_name', help='Append output to file (default: write output to stdout)')
add_optional_args(parser)
args = parser.parse_args()
kwargs = {'file': args.file_name}
kwargs = {'file': args.file_name }
if args.debug:
kwargs['verbose'] = log.DEBUG
elif args.verbose:
kwargs['verbose'] = log.INFO
if not args.keys:
parser.print_help()
exit(1)
asyncio.run(get_citations(args.keys, **kwargs))

get_citations(args.keys, **kwargs)


def pybibparse():
"""
Reads latex file name from the command line, parses the .blg file and calls get_citations()
"""
parser = argparse.ArgumentParser(prog='pybibget', description='Command line utility to automatically retrieve missing BibTeX citations from MathSciNet, arXiv and PubMed')

parser.add_argument('file_name', type=str, metavar='file_name', nargs=1, help='base file name of main tex file (without .tex extension)')
parser.add_argument('-v', '--verbose', action='store_true', help='verbose output')
parser.add_argument('-d', '--debug', action='store_true', help='debug output')
parser.add_argument('-w', '--write', metavar="file_name", action='store', nargs='?', const=" ", type=str, help='Append output to .bib file (default: write output to stdout). The .bib file is automatically detected from the .blg file. If no .bib file is found, the .bib file has to be specified explicitly via "-w file_name.bib".')

parser = argparse.ArgumentParser(prog='pybibget', description='Command line utility to automatically retrieve BibTeX citations from MathSciNet, arXiv and PubMed')
parser.add_argument('file_name', type=str, metavar='tex_file(.tex)', nargs=1, help='LaTeX file to be parsed for missing citations')
parser.add_argument('-w', action='store', dest='write', metavar="output.bib", nargs='?', const=" ", help='Append output to file (default: write output to stdout). A bib file name can be specified via "-w file_name.bib" but usually the .bib file is found automatically.')
add_optional_args(parser)
args = parser.parse_args()
if not args.file_name:
parser.print_help()
sys.exit()
if args.file_name[0].endswith(".tex"):
base_file_name = args.file_name[0][:-4]
else:
base_file_name = args.file_name[0]

base_file_name = args.file_name[0]
with open(base_file_name+".blg") as file:
blg_file = file.read()
missing_cites = re.findall(r"I didn't find a database entry for '([A-Za-z0-9\.\-_ :\/]*)'", blg_file) \
Expand All @@ -68,26 +71,39 @@ def pybibparse():
print("No .bib file found. Please specify the .bib file via '-w file_name.bib'")
sys.exit()
kwargs['file'] = bib_file_names[0] if args.write == " " else args.write
asyncio.run(get_citations(missing_cites, **kwargs))
get_citations(missing_cites, **kwargs)
else:
print("No missing citations found. Make sure that biber/bibtex is run successfully before running pybibget.")

def pybibupdate():
parser = argparse.ArgumentParser(prog='pybibget', description='Command line utility to update BibTeX citations from MathSciNet and Scopus')
parser.add_argument('file_name', type=str, metavar='bib_file(.bib)', help='bib file to be parsed for citations')
args = parser.parse_args()
if not args.file_name:
parser.print_help()
sys.exit()
if not args.file_name.endswith(".bib"):
args.file_name += ".bib"
with open(args.file_name) as file:
bib_file = file.read()
bibliography = parse_string(bib_file, 'bibtex').entries

log.basicConfig(format="%(levelname)s: %(message)s", level=log.WARNING)

bibget = Bibget(mathscinet=True)
updated_bibliography = asyncio.run(bibget.update_all(bibliography))
with open(args.file_name, 'w') as file:
file.write(updated_bibliography.to_string('bibtex'))
print(f"Wrote the updated bibliography to {args.file_name}.")

async def get_citations(keys, verbose=log.WARN, file=None):
def get_citations(keys, verbose=log.WARNING, file=None):
"""
Retrieves BibTeX entries for given citation keys and writes them to file or stdout
"""
log.basicConfig(format="%(levelname)s: %(message)s", level=verbose)

bibentries = await asyncio.gather(*[bibentry.getbibentry(key) for key in keys],return_exceptions=True)
bib_data = pybtex.database.BibliographyData()
for entry_key in bibentries:
if type(entry_key) in [ValueError, TypeError]:
log.error(entry_key)
else:
entry,key = entry_key
bib_data.entries[key] = entry

bibget = Bibget(mathscinet=True)
bib_data = asyncio.run(bibget.citations(keys))
number_of_entries = len(bib_data.entries)
bib_data = bib_data.to_string('bibtex')
if file:
Expand All @@ -96,6 +112,7 @@ async def get_citations(keys, verbose=log.WARN, file=None):
print(f"Successfully appended {number_of_entries} BibTeX entries to {file}.")
else:
print("\n"+bib_data)
return number_of_entries


if __name__ == '__main__':
Expand Down
Binary file modified pybibget/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file modified pybibget/__pycache__/bibentry.cpython-39.pyc
Binary file not shown.
Loading

0 comments on commit a469f93

Please sign in to comment.