Skip to content

Commit

Permalink
oai: harvesting
Browse files Browse the repository at this point in the history
* Fixes problems with connection problems for ApiHarvestConfig.
* Disables oai harvesting tasks. Should be done in a crontab job.
* Fixes api blueprint configuration.
  • Loading branch information
rerowep committed Dec 17, 2020
1 parent 9816a09 commit da0b169
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 59 deletions.
20 changes: 7 additions & 13 deletions .github/workflows/continuous-integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
dependencies: ['locked', 'deploy']
dependencies: ['dev', 'deploy']
steps:
- name: Cancel Previous Runs
uses: styfle/[email protected]
with:
access_token: ${{ github.token }}
# - name: Cancel Previous Runs
# uses: styfle/[email protected]
# with:
# access_token: ${{ github.token }}

- uses: actions/checkout@v2

Expand Down Expand Up @@ -42,22 +42,16 @@ jobs:
~/.npm
key: ${{ runner.os }}-pip-venv-${{ hashFiles('**/poetry.lock') }}

- name: Bootstrap locked
if: ${{ matrix.dependencies == 'locked' }}
- name: Bootstrap dev
if: ${{ matrix.dependencies == 'dev' }}
run: |
pip install "poetry<1.1.0"
poetry run bootstrap --ci
- name: Bootstrap deploy
if: ${{ matrix.dependencies == 'deploy' }}
run: |
poetry run bootstrap --ci --deploy E2E=yes
- name: Update pip and coveralls
run: |
poetry run pip install --upgrade pip
poetry run pip install --upgrade coveralls
- name: Run Test
run: poetry run run-tests

Expand Down
28 changes: 14 additions & 14 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions rero_ebooks/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class Meta:
"""Search only on documents index."""

index = 'ebooks'
doc_types = None
fields = ('*', )
facets = {}

default_filter = None


class Ebook(Record):
Expand Down
16 changes: 15 additions & 1 deletion rero_ebooks/apiharvester/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from flask import current_app
from invenio_db import db
from invenio_oaiserver.models import OAISet
from sqlalchemy.exc import OperationalError

from .errors import ApiHarvesterConfigNotFound
from .models import ApiHarvestConfig
Expand Down Expand Up @@ -99,7 +100,20 @@ def get_apiharvest_object(name):
:param name: The name of the ApiHarvestConfig object.
:return: The ApiHarvestConfig object.
"""
obj = ApiHarvestConfig.query.filter_by(name=name).first()
get_config_error_count = 0
get_config_ok = False
while not get_config_ok and get_config_error_count < 5:
try:
obj = ApiHarvestConfig.query.filter_by(name=name).first()
get_config_ok = True
except OperationalError:
get_config_error_count += 1
msg = 'ApiHarvestConfig OperationalError: {count} {name}'.format(
count=get_config_error_count,
name=name
)
current_app.logger.error(msg)

if not obj:
raise ApiHarvesterConfigNotFound(
'Unable to find ApiHarvesterConfig obj with name %s.'
Expand Down
33 changes: 13 additions & 20 deletions rero_ebooks/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@

from datetime import timedelta

from invenio_indexer.api import RecordIndexer
from invenio_search import RecordsSearch


def _(x):
"""Identity function used to trigger string extraction."""
Expand Down Expand Up @@ -137,16 +134,16 @@ def _(x):
# 'schedule': timedelta(minutes=60),
# 'kwargs': dict(name='NJ')
# },
'Apiharvester-NJ': {
'task': 'rero_ebooks.apiharvester.tasks.harvest_records',
'schedule': timedelta(minutes=60),
'kwargs': dict(name='NJ'),
},
'Apiharvester-VS': {
'task': 'rero_ebooks.apiharvester.tasks.harvest_records',
'schedule': timedelta(minutes=60),
'kwargs': dict(name='VS')
},
# 'Apiharvester-NJ': {
# 'task': 'rero_ebooks.apiharvester.tasks.harvest_records',
# 'schedule': timedelta(minutes=60),
# 'kwargs': dict(name='NJ'),
# },
# 'Apiharvester-VS': {
# 'task': 'rero_ebooks.apiharvester.tasks.harvest_records',
# 'schedule': timedelta(minutes=60),
# 'kwargs': dict(name='VS')
# },
}
CELERY_BROKER_HEARTBEAT = 0

Expand All @@ -163,11 +160,6 @@ def _(x):
#: Hostname used in URLs for local JSONSchemas.
JSONSCHEMAS_HOST = 'ebooks.rero.ch'

# PIDStore
# ========
#: PID field name.
PIDSTORE_RECID_FIELD = 'pid'

# Flask configuration
# ===================
# See details on
Expand Down Expand Up @@ -260,8 +252,9 @@ def _(x):
pid_type='ebook',
pid_minter='ebook',
pid_fetcher='ebook',
search_class=RecordsSearch,
indexer_class=RecordIndexer,
search_class="rero_ebooks.api:EbooksSearch",
indexer_class="invenio_indexer.api:RecordIndexer",
record_class="rero_ebooks.api:Ebook",
search_index=None,
search_type=None,
record_serializers={
Expand Down
7 changes: 2 additions & 5 deletions rero_ebooks/fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,21 @@

from collections import namedtuple

from flask import current_app

from .providers import EbookPidProvider

FetchedPID = namedtuple('FetchedPID', ['provider', 'pid_type', 'pid_value'])
"""A pid fetcher."""


def ebook_pid_fetcher(record_uuid, data):
def ebook_pid_fetcher(record_uuid, data, pid_key='pid'):
"""Fetch a ebook's identifiers.
:param record_uuid: The record UUID.
:param data: The record metadata.
:returns: A :data:`invenio_pidstore.fetchers.FetchedPID` instance.
"""
pid_field = current_app.config['PIDSTORE_RECID_FIELD']
return FetchedPID(
provider=EbookPidProvider,
pid_type=EbookPidProvider.pid_type,
pid_value=str(data[pid_field]),
pid_value=str(data[pid_key]),
)
9 changes: 4 additions & 5 deletions rero_ebooks/minters.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

"""Ebooks minters."""

from flask import current_app
from invenio_oaiserver.minters import oaiid_minter

from .providers import EbookPidProvider
Expand All @@ -46,7 +45,8 @@ def build_ebook_pid(data, source):
return source + '-' + pid_value


def ebook_pid_minter(record_uuid, data, source):
def ebook_pid_minter(record_uuid, data, source, pid_key='pid',
object_type='rec'):
"""Mint record identifiers.
This is a minter specific for ebooks.
Expand All @@ -66,11 +66,10 @@ def ebook_pid_minter(record_uuid, data, source):
:param data: The record metadata.
:returns: A fresh `invenio_pidstore.models.PersistentIdentifier` instance.
"""
pid_field = current_app.config['PIDSTORE_RECID_FIELD']
assert pid_field not in data
assert pid_key not in data
pid_value = build_ebook_pid(data, source)
provider = EbookPidProvider.create(
object_type='rec', pid_value=pid_value, object_uuid=record_uuid)
data[pid_field] = pid_value
data[pid_key] = pid_value
oaiid_minter(record_uuid, data)
return provider.pid
5 changes: 5 additions & 0 deletions rero_ebooks/theme/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
static_folder='static',
)

api_blueprint = Blueprint(
'api_rero_ebooks',
__name__
)


@blueprint.route('/')
def index():
Expand Down
2 changes: 1 addition & 1 deletion rero_ebooks/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@

from __future__ import absolute_import, print_function

__version__ = '0.4.0'
__version__ = '0.5.0'
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def run(self):
'invenio_base.blueprints': [
'rero_ebooks = rero_ebooks.theme.views:blueprint',
],
'invenio_base.api_blueprints': [
'api_rero_ebooks = rero_ebooks.theme.views:api_blueprint',
],
'invenio_config.module': [
'rero_ebooks = rero_ebooks.config',
],
Expand Down

0 comments on commit da0b169

Please sign in to comment.