-
Notifications
You must be signed in to change notification settings - Fork 16
/
Dockerfile-index
42 lines (35 loc) · 1.16 KB
/
Dockerfile-index
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# arxiv/search-index
#
# Runtime for bulk indexing of a large number of records. This is not used in
# version 0.1, but is available for exploratory work in Kubernetes.
#
# Expects a path to a list of newline-delimited arXiv IDs (versionless).
#
# Example (local stack):
#
# $ mkdir /tmp/to_index
# $ cp arxiv_id_dump.txt /tmp/to_index
# $ docker run -it --network=arxivsearch_es_stack \
# > -v /tmp/to_index:/to_index \
# > -e ELASTICSEARCH_SERVICE_HOST=elasticsearch \
# > arxiv/search-index /to_index/arxiv_id_dump.txt
#
# See also ELASTICSEARCH_* and METADATA_ENDPOINT parameters, below.
FROM arxiv/search:0.5.5
ENV PATH "/opt/arxiv:${PATH}"
ADD bulk_index.py /opt/arxiv/
WORKDIR /opt/arxiv/
ENV LC_ALL en_US.utf8
ENV LANG en_US.utf8
ENV LOGLEVEL 40
ENV FLASK_DEBUG 1
ENV FLASK_APP /opt/arxiv/app.py
ENV ELASTICSEARCH_SERVICE_HOST 127.0.0.1
ENV ELASTICSEARCH_SERVICE_PORT 9200
ENV ELASTICSEARCH_SERVICE_PORT_9200_PROTO http
ENV ELASTICSEARCH_USER elastic
ENV ELASTICSEARCH_PASSWORD changeme
ENV METADATA_ENDPOINT https://arxiv.org/docmeta_bulk/
ENV METADATA_VERIFY_CERT True
VOLUME /to_index
ENTRYPOINT ["pipenv", "run", "python3.6", "bulk_index.py", "-l"]