diff --git a/README.md b/README.md index 8a99bf3..9572967 100644 --- a/README.md +++ b/README.md @@ -58,10 +58,10 @@ $ pip install pybibget URL = "https://doi.org/10.1109/TIT.2006.885507" } -@article{math/0211159, +@unpublished{math/0211159, author = "Perelman, Grisha", title = "{The} entropy formula for the {Ricci} flow and its geometric applications", - journal = "preprint", + note = "Preprint", year = "2002", eprint = "math/0211159", archiveprefix = "arXiv" @@ -114,7 +114,6 @@ With the option `-f filename` the result can be *appended* to any given file dir % pybibget MR0026286 10.1109/TIT.2006.885507 math/0211159 PMID:271968 10.1109/CVPR.2016.90 hep-th/9711200 -f bibliography.bib Succesfully appended 6 BibTeX entries to bibliography.bib ``` -the result can be *appended* to a given `.bib` file. ### TeX File Parsing diff --git a/dist/.DS_Store b/dist/.DS_Store deleted file mode 100644 index 3a3f18a..0000000 Binary files a/dist/.DS_Store and /dev/null differ diff --git a/dist/pybibget-0.0.1-py3-none-any.whl b/dist/pybibget-0.0.1-py3-none-any.whl index 3d7cc7f..b853f33 100644 Binary files a/dist/pybibget-0.0.1-py3-none-any.whl and b/dist/pybibget-0.0.1-py3-none-any.whl differ diff --git a/dist/pybibget-0.0.1.tar.gz b/dist/pybibget-0.0.1.tar.gz index 0838fa4..4409db1 100644 Binary files a/dist/pybibget-0.0.1.tar.gz and b/dist/pybibget-0.0.1.tar.gz differ diff --git a/dist/pybibget-0.0.1/.DS_Store b/dist/pybibget-0.0.1/.DS_Store deleted file mode 100644 index aa337f9..0000000 Binary files a/dist/pybibget-0.0.1/.DS_Store and /dev/null differ diff --git a/dist/pybibget-0.0.1/LICENSE b/dist/pybibget-0.0.1/LICENSE deleted file mode 100644 index 1e376aa..0000000 --- a/dist/pybibget-0.0.1/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2023 wirhabenzeit - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/dist/pybibget-0.0.1/PKG-INFO b/dist/pybibget-0.0.1/PKG-INFO deleted file mode 100644 index 48b2871..0000000 --- a/dist/pybibget-0.0.1/PKG-INFO +++ /dev/null @@ -1,5 +0,0 @@ -Metadata-Version: 2.1 -Name: pybibget -Version: 0.0.1 -Requires-Python: >=3.6 -License-File: LICENSE diff --git a/dist/pybibget-0.0.1/README.md b/dist/pybibget-0.0.1/README.md deleted file mode 100644 index cfec149..0000000 --- a/dist/pybibget-0.0.1/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# pybibget -Python Module to automatically retrieve BibTeX citations from MathSciNet, arXiv and Pubmed diff --git a/dist/pybibget-0.0.1/pybibget.egg-info/PKG-INFO b/dist/pybibget-0.0.1/pybibget.egg-info/PKG-INFO deleted file mode 100644 index 48b2871..0000000 --- a/dist/pybibget-0.0.1/pybibget.egg-info/PKG-INFO +++ /dev/null @@ -1,5 +0,0 @@ -Metadata-Version: 2.1 -Name: pybibget -Version: 0.0.1 -Requires-Python: >=3.6 -License-File: LICENSE diff --git a/dist/pybibget-0.0.1/pybibget.egg-info/SOURCES.txt b/dist/pybibget-0.0.1/pybibget.egg-info/SOURCES.txt deleted file mode 100644 index 3bb9a86..0000000 --- a/dist/pybibget-0.0.1/pybibget.egg-info/SOURCES.txt +++ /dev/null @@ -1,13 +0,0 @@ -LICENSE -README.md -pyproject.toml -setup.cfg -setup.py -pybibget/bibentry.py -pybibget/pybibget.py -pybibget.egg-info/PKG-INFO -pybibget.egg-info/SOURCES.txt -pybibget.egg-info/dependency_links.txt -pybibget.egg-info/entry_points.txt -pybibget.egg-info/requires.txt -pybibget.egg-info/top_level.txt \ No newline at end of file diff --git a/dist/pybibget-0.0.1/pybibget.egg-info/dependency_links.txt b/dist/pybibget-0.0.1/pybibget.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/dist/pybibget-0.0.1/pybibget.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/dist/pybibget-0.0.1/pybibget.egg-info/entry_points.txt b/dist/pybibget-0.0.1/pybibget.egg-info/entry_points.txt deleted file mode 100644 index 2acd312..0000000 --- a/dist/pybibget-0.0.1/pybibget.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -pybibget = pybibget.pybibget:pybibget diff --git a/dist/pybibget-0.0.1/pybibget.egg-info/requires.txt b/dist/pybibget-0.0.1/pybibget.egg-info/requires.txt deleted file mode 100644 index 3f4498a..0000000 --- a/dist/pybibget-0.0.1/pybibget.egg-info/requires.txt +++ /dev/null @@ -1,4 +0,0 @@ -requests -pybtex -lxml -pylatexenc diff --git a/dist/pybibget-0.0.1/pybibget.egg-info/top_level.txt b/dist/pybibget-0.0.1/pybibget.egg-info/top_level.txt deleted file mode 100644 index afc7bad..0000000 --- a/dist/pybibget-0.0.1/pybibget.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -pybibget diff --git a/dist/pybibget-0.0.1/pybibget/bibentry.py b/dist/pybibget-0.0.1/pybibget/bibentry.py deleted file mode 100644 index 75c8e5e..0000000 --- a/dist/pybibget-0.0.1/pybibget/bibentry.py +++ /dev/null @@ -1,110 +0,0 @@ -from lxml import html, etree -import requests,re -from pybtex.database import Entry, Person, parse_string, parse_bytes -from pylatexenc.latexencode import unicode_to_latex -from pylatexenc.latex2text import LatexNodes2Text -atom_namespace = 'http://www.w3.org/2005/Atom' -arxiv_namespace = 'http://arxiv.org/schemas/atom' -re_mathscinet = r'MR\d{4,10}' -re_pubmed = r'PMID:\d{4,10}' -re_doi = r'10\.\d{4,9}\/[-._;()/:A-Za-z0-9]+' -re_arxiv_old = r'\b[a-zA-Z\-\.]{2,10}\/\d{7}(?:v\d)?\b' -re_arxiv_new = r'\b\d{4}\.\d{4,5}(?:v\d)?\b' - - -def getbibentry(id,verbose=False): - if re.match(re_mathscinet,id): - if verbose: - print(f"Looking for MathSciNet key {id}...",end=" ") - return get_mathscinet_bibentry(id=id,verbose=verbose) - elif re.match(re_pubmed,id): - if verbose: - print(f"Looking for PubMed key {id}...",end=" ") - elif re.match(re_arxiv_old, id) or re.match(re_arxiv_new, id): - if verbose: - print(f"Looking for arXiv key {id}...",end=" ") - return get_arxiv_bibentry(id,verbose=verbose) - elif re.match("",id): - try: - if verbose: - print(f"Looking for {id} on MathSciNet...",end=" ") - return get_mathscinet_bibentry(doi=id,verbose=verbose) - except ValueError: - if verbose: - print(f"Looking for {id} on doi.org...",end=" ") - return get_doi_bibentry(id,verbose=verbose) - else: - raise ValueError(f"Invalid citation key {id}!\n") - -def get_mathscinet_bibentry(id=None,doi=None,verbose=False): - if id: - url = "https://mathscinet.ams.org/mathscinet/search/publications.html?fmt=bibtex&pg1=MR&s1=" + id[2:] - elif doi: - url = "https://mathscinet.ams.org/mathscinet/search/publications.html?fmt=bibtex&pg1=DOI&s1=" + doi - page = requests.get(url) - tree = html.fromstring(page.content) - if bibstrings := tree.xpath('//pre/text()'): - if verbose: - print("Success!") - bibstr=bibstrings[0] - else: - raise ValueError(f"Not found! Please check the citation key and whether you have access to MathSciNet.") - entries = parse_string(bibstr,'bibtex').entries - univ_id= list(entries.keys())[0] - return entries[univ_id] - -def get_doi_bibentry(id,verbose=False): - url = "https://doi.org/" + id - headers = { 'Accept': 'application/x-bibtex; charset=utf-8' } - page = requests.get(url,headers=headers) - entries = parse_bytes(page.content,'bibtex').entries - if len(entries) == 1: - if verbose: - print("Success!") - return sanitize_entry(list(entries.values())[0]) - else: - raise ValueError(f"Not found! Please check the citation key and whether you have access to doi.org.") - -def get_arxiv_bibentry(id,verbose=False): - url = "http://export.arxiv.org/api/query?id_list=" + id - page = requests.get(url) - tree = etree.fromstring(page.content) - if doi := tree.xpath("//a:entry/b:doi",namespaces={ 'a': atom_namespace,'b': arxiv_namespace }): - if verbose: - print("Detected DOI in arXiv record...",end=" ") - bibentry = getbibentry(doi[0].text,verbose=verbose) - elif title := tree.xpath("//a:entry/a:title",namespaces={ 'a': atom_namespace }): - if verbose: - print("Success!") - fields = [("TITLE", title[0].text)] - if journal := tree.xpath("//a:entry/a:journal",namespaces={ 'a': atom_namespace }): - fields += [("JOURNAL",journal[0].text)] - else: - fields += [("JOURNAL","preprint")] - fields += [("YEAR", tree.xpath("//a:entry/a:published",namespaces={ 'a':atom_namespace })[0].text[:4])] - bibentry = Entry("article", fields=fields) - bibentry.persons["AUTHOR"] = [Person(author.text) for author in tree.xpath("//a:entry/a:author/a:name",namespaces={ 'a': atom_namespace })] - bibentry = sanitize_entry(bibentry) - else: - raise ValueError(f"Could not find citation for {id}. Please check the citation key and whether you have access to arXiv.") - bibentry.fields["EPRINT"] = id - bibentry.fields["ARCHIVEPREFIX"] = "arXiv" - return bibentry - - -def sanitize_entry(entry): - entry.fields["TITLE"] = sanitize_string(entry.fields["TITLE"],title=True) - for author in entry.persons["AUTHOR"]: - author.first_names = [sanitize_string(name) for name in author.first_names] - author.last_names = [sanitize_string(name) for name in author.last_names] - if "month" in entry.fields: - entry.fields.pop("month") - return entry - -def sanitize_string(string,title=False): - string = string.replace("\n", "").replace("\t", "") - string = LatexNodes2Text().latex_to_text(string) - string=unicode_to_latex(string) - if title: - string = re.sub(r'\b([A-Z].*?)\b',r'{\1}',string) - return string \ No newline at end of file diff --git a/dist/pybibget-0.0.1/pybibget/pybibget.py b/dist/pybibget-0.0.1/pybibget/pybibget.py deleted file mode 100644 index 97f2f17..0000000 --- a/dist/pybibget-0.0.1/pybibget/pybibget.py +++ /dev/null @@ -1,25 +0,0 @@ -import argparse, re -import bibentry -import pybtex.database - -def pybibget(): - parser = argparse.ArgumentParser(prog ='pybibget',description ='Command line utility to automatically retrieve BibTeX citations from MathSciNet, arXiv and PubMed') - - parser.add_argument('keys', type = str, metavar ='citekeys', nargs='*',help ='MathSciNet (MRxxxxx), arXiv (2301.xxxxx) or PubMed (PMID:xxxxxxxx) citation keys (separated by spaces)') - parser.add_argument('-v','--verbose',action='store_true',help='verbose output' ) - - args = parser.parse_args() - - if not args.keys: - parser.print_help() - exit(1) - bib_data = pybtex.database.BibliographyData() - for key in args.keys: - try: - bib_data.entries[key] = bibentry.getbibentry(key,verbose=args.verbose) - except ValueError as e: - print(e) - print(bib_data.to_string('bibtex')) - -if __name__ == '__main__': - pybibget() \ No newline at end of file diff --git a/dist/pybibget-0.0.1/pyproject.toml b/dist/pybibget-0.0.1/pyproject.toml deleted file mode 100644 index 5f8f5e3..0000000 --- a/dist/pybibget-0.0.1/pyproject.toml +++ /dev/null @@ -1,3 +0,0 @@ -[build-system] -build-backend = "setuptools.build_meta" -requires = ["setuptools", "wheel"] \ No newline at end of file diff --git a/dist/pybibget-0.0.1/setup.cfg b/dist/pybibget-0.0.1/setup.cfg deleted file mode 100644 index 065c1d8..0000000 --- a/dist/pybibget-0.0.1/setup.cfg +++ /dev/null @@ -1,23 +0,0 @@ -[metadata] -name = pybibget -version = 0.0.1 - -[options] -include_package_data = True -packages = - pybibget -install_requires = - requests - pybtex - lxml - pylatexenc -python_requires = >=3.6 - -[options.entry_points] -console_scripts = - pybibget = pybibget.pybibget:pybibget - -[egg_info] -tag_build = -tag_date = 0 - diff --git a/dist/pybibget-0.0.1/setup.py b/dist/pybibget-0.0.1/setup.py deleted file mode 100644 index 6a9e5ae..0000000 --- a/dist/pybibget-0.0.1/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup() \ No newline at end of file diff --git a/pybibget.egg-info/PKG-INFO b/pybibget.egg-info/PKG-INFO index 48b2871..cea80f8 100644 --- a/pybibget.egg-info/PKG-INFO +++ b/pybibget.egg-info/PKG-INFO @@ -1,5 +1,186 @@ Metadata-Version: 2.1 Name: pybibget Version: 0.0.1 +Summary: Command line utility to automatically retrieve BibTeX citations from MathSciNet, arXiv, PubMed and doi.org +Home-page: https://github.com/wirhabenzeit/pybibget +Author: Dominik Schröder +Author-email: dschroeder@ethz.ch +License: MIT License +Keywords: BibTeX,MathSciNet,PubMed,DOI,arXiv,bibliography,command-line,citation +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent Requires-Python: >=3.6 +Description-Content-Type: text/markdown License-File: LICENSE + +# pybibget + +Command line utility to automatically retrieve BibTeX citations from MathSciNet, arXiv, PubMed and doi.org + +## Installation + +```bash +$ pip install pybibget +``` + +## Usage + +### Citation Keys + +`pybibget` provides a command line interface to obtain BibTeX entries from citation keys of the form +| Citation key | Format | +|----------------------|-------------------------------| +| MR0026286 | MathSciNet (requires subscription) | +| 1512.03385 | arXiv identifier (new format) | +| hep-th/9711200 | arXiv identifier (old format) | +| PMID:271968 | PubMed | +| 10.1109/CVPR.2016.90 | DOI | + +`pybibget key1 key2 ...` prints the BibTeX entries `stdout`: +```console +% pybibget MR0026286 10.1109/TIT.2006.885507 math/0211159 PMID:271968 10.1109/CVPR.2016.90 hep-th/9711200 + +@article{MR0026286, + AUTHOR = "Shannon, C. E.", + TITLE = "A mathematical theory of communication", + JOURNAL = "Bell System Tech. J.", + FJOURNAL = "The Bell System Technical Journal", + VOLUME = "27", + YEAR = "1948", + PAGES = "379--423, 623--656", + ISSN = "0005-8580", + MRCLASS = "60.0X", + MRNUMBER = "26286", + MRREVIEWER = "J. L. Doob", + DOI = "10.1002/j.1538-7305.1948.tb01338.x", + URL = "https://doi.org/10.1002/j.1538-7305.1948.tb01338.x" +} + +@article{10.1109/TIT.2006.885507, + AUTHOR = "Candes, Emmanuel J. and Tao, Terence", + TITLE = "Near-optimal signal recovery from random projections: universal encoding strategies?", + JOURNAL = "IEEE Trans. Inform. Theory", + FJOURNAL = "Institute of Electrical and Electronics Engineers. Transactions on Information Theory", + VOLUME = "52", + YEAR = "2006", + NUMBER = "12", + PAGES = "5406--5425", + ISSN = "0018-9448", + MRCLASS = "94A12 (41A25 94A13)", + MRNUMBER = "2300700", + MRREVIEWER = "L. L. Campbell", + DOI = "10.1109/TIT.2006.885507", + URL = "https://doi.org/10.1109/TIT.2006.885507" +} + +@unpublished{math/0211159, + author = "Perelman, Grisha", + title = "{The} entropy formula for the {Ricci} flow and its geometric applications", + note = "Preprint", + year = "2002", + eprint = "math/0211159", + archiveprefix = "arXiv" +} + +@article{PMID:271968, + author = "Sanger, F. and Nicklen, S. and Coulson, A. R.", + doi = "10.1073/pnas.74.12.5463", + url = "https://doi.org/10.1073/pnas.74.12.5463", + year = "1977", + publisher = "Proceedings of the National Academy of Sciences", + volume = "74", + number = "12", + pages = "5463--5467", + title = "{DNA} sequencing with chain-terminating inhibitors", + journal = "Proceedings of the National Academy of Sciences", + PMID = "271968" +} + +@inproceedings{10.1109/CVPR.2016.90, + author = "He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian", + doi = "10.1109/cvpr.2016.90", + url = "https://doi.org/10.1109/cvpr.2016.90", + year = "2016", + publisher = "{IEEE}", + title = "{Deep} {Residual} {Learning} for {Image} {Recognition}", + booktitle = "2016 {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})" +} + +@article{hep-th/9711200, + AUTHOR = "Phan, Trung V. and Doan, Anh", + TITLE = "A curious use of extra dimension in classical mechanics: geometrization of potential", + JOURNAL = "J. Geom. Graph.", + FJOURNAL = "Journal for Geometry and Graphics", + VOLUME = "25", + YEAR = "2021", + NUMBER = "2", + PAGES = "265--270", + ISSN = "1433-8157", + MRCLASS = "70B05", + MRNUMBER = "4394144", + DOI = "10.1023/a:1026654312961", + URL = "https://doi.org/10.1023/a:1026654312961", + eprint = "hep-th/9711200", + archiveprefix = "arXiv" +} +``` +With the option `-f filename` the result can be *appended* to any given file directly: +```console +% pybibget MR0026286 10.1109/TIT.2006.885507 math/0211159 PMID:271968 10.1109/CVPR.2016.90 hep-th/9711200 -f bibliography.bib +Succesfully appended 6 BibTeX entries to bibliography.bib +``` + +### TeX File Parsing + +`pybibparse` automatically parses missing citations from the `biber` or `bibtex` log for a given `TeX` file +```console +% pybibparse example + +@article{math/0211159, + author = "Perelman, Grisha", + title = "{The} entropy formula for the {Ricci} flow and its geometric applications", + journal = "preprint", + year = "2002", + eprint = "math/0211159", + archiveprefix = "arXiv" +} + +@article{PMID:271968, + author = "Sanger, F. and Nicklen, S. and Coulson, A. R.", + doi = "10.1073/pnas.74.12.5463", + url = "https://doi.org/10.1073/pnas.74.12.5463", + year = "1977", + publisher = "Proceedings of the National Academy of Sciences", + volume = "74", + number = "12", + pages = "5463--5467", + title = "{DNA} sequencing with chain-terminating inhibitors", + journal = "Proceedings of the National Academy of Sciences", + PMID = "271968" +} +``` + +With the option `-w [file_name]` the obtained citations are automatically appended to the `.bib` file. `[file_name]` is optional if the `.bib` file has been specified in the `TeX` file. +```console +% pybibparse example -w +Succesfully appended 2 BibTeX entries to bibliography.bib +``` + +## Data Sources + +### MathSciNet +Directly accesses [MathSciNet](https://mathscinet.ams.org/mathscinet/index.html) and uses the provided citation unmodified + +### DOI +First searches for the DOI on [MathSciNet](https://mathscinet.ams.org/mathscinet/index.html). If successful, uses the MathSciNet strategy, otherwise uses the citation from [doi.org](https://doi.org) with the following modifications: +- Author names and title are converted to TeX form (special characters like `ö` are converted to `"{o}`) +- Capital words in the title are surrounded by `{...}`to ensure capitalization +- Publication month data is removed + +### PubMed +Searches for the DOI on [PubMed](https://pubmed.ncbi.nlm.nih.gov), then uses the DOI strategy and appends `pmid = [PMID]` to the resulting citation. + +### arXiv +Uses DOI strategy if metadata contains `doi`. +Otherwise creates an `unpublished` bib-entry with `note = "Preprint"` or `note = [Journal Metadata]` (if provided). In any-case appends `eprint = [arXiv identifier]` to the citation. diff --git a/pybibget.egg-info/requires.txt b/pybibget.egg-info/requires.txt index 3f4498a..1a18274 100644 --- a/pybibget.egg-info/requires.txt +++ b/pybibget.egg-info/requires.txt @@ -1,4 +1,4 @@ -requests -pybtex -lxml -pylatexenc +requests>=2.28.1 +pybtex>=0.24.0 +lxml>=4.9.2 +pylatexenc>=1.3 diff --git a/setup.cfg b/setup.cfg index 1469d43..da2e38b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,20 +1,31 @@ [metadata] name = pybibget version = 0.0.1 +author = Dominik Schröder +author_email = dschroeder@ethz.ch +url = https://github.com/wirhabenzeit/pybibget +description = Command line utility to automatically retrieve BibTeX citations from MathSciNet, arXiv, PubMed and doi.org +long_description = file: README.md +long_description_content_type = text/markdown +keywords = BibTeX, MathSciNet, PubMed, DOI, arXiv, bibliography, command-line, citation +license = MIT License +classifiers = + Programming Language :: Python :: 3 + License :: OSI Approved :: MIT License + Operating System :: OS Independent [options] packages = find: zip_safe = True include_package_data = True install_requires = - requests - pybtex - lxml - pylatexenc + requests >= 2.28.1 + pybtex >= 0.24.0 + lxml >= 4.9.2 + pylatexenc >= 1.3 python_requires = >=3.6 - [options.entry_points] console_scripts = pybibget = pybibget:pybibget - pybibparse = pybibget:pybibparse \ No newline at end of file + pybibparse = pybibget:pybibparse