Skip to content

Commit

Permalink
OA: add CDS Token
Browse files Browse the repository at this point in the history
  • Loading branch information
ErnestaP committed Apr 25, 2024
1 parent 3d1359b commit 0354801
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
9 changes: 4 additions & 5 deletions dags/open_access/open_access.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
from functools import reduce

import open_access.constants as constants
Expand All @@ -18,7 +19,7 @@ def oa_dag():
@task(executor_config=kubernetes_executor_config)
def fetch_data_task(query, **kwargs):
year = kwargs["params"].get("year")
cds_token = None # os.environ.get("CDS_TOKEN")
cds_token = os.environ.get("CDS_TOKEN")
if not cds_token:
logging.warning("cds token is not set!")
base_query = (
Expand All @@ -27,10 +28,8 @@ def fetch_data_task(query, **kwargs):
+ r"not+980:BookChapter"
)
type_of_query = [*query][0]
url = utils.get_url(
query=f"{base_query}+{query[type_of_query]}", cds_token=cds_token
)
data = utils.request_again_if_failed(url)
url = utils.get_url(query=f"{base_query}+{query[type_of_query]}")
data = utils.request_again_if_failed(url=url, cds_token=cds_token)
total = utils.get_total_results_count(data.text)
if type_of_query == "gold":
total = utils.get_gold_access_count(total, url)
Expand Down
8 changes: 5 additions & 3 deletions dags/open_access/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from open_access.parsers import get_golden_access_records_ids


def request_again_if_failed(url):
def request_again_if_failed(url, cds_token=None):
if cds_token:
header = {"Authorization": f"token {cds_token}"}
response = requests.get(url, header)
response = requests.get(url)
count = 1

Expand Down Expand Up @@ -53,11 +56,10 @@ def get_gold_access_count(total, url):
return records_ids_count


def get_url(query, current_collection="Published+Articles", cds_token=None):
def get_url(query, current_collection="Published+Articles"):
url = (
rf"https://cds.cern.ch/search?ln=en&cc={current_collection}&p={query}"
+ r"&action_search=Search&op1=a&m1=a&p1=&f1=&c="
+ r"Published+Articles&c=&sf=&so=d&rm=&rg=100&sc=0&of=xm"
)
url = url + (rf"&apikey={cds_token}" if cds_token else "")
return url

0 comments on commit 0354801

Please sign in to comment.