forked from LeoFCardoso/pdf2pdfocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
58 lines (46 loc) · 1.18 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# pdf2pdfocr
#
# Dockerfile version 5.0
#
FROM ubuntu:19.10
MAINTAINER Leonardo F. Cardoso <[email protected]>
RUN useradd docker \
&& mkdir /home/docker \
&& chown docker:docker /home/docker
# Software dependencies [Start]
RUN apt-get update && apt-get install -y --no-install-recommends \
cuneiform \
qpdf \
file \
ghostscript \
imagemagick \
locales \
poppler-utils \
python3 \
python3-pip \
python3-setuptools\
tesseract-ocr \
tesseract-ocr-osd tesseract-ocr-por tesseract-ocr-eng
# tesseract-ocr-all
# Allow IM to process PDF
RUN rm /etc/ImageMagick-6/policy.xml
# Software dependencies [End]
# Python 3 and deps [Start]
RUN pip3 install --upgrade Pillow reportlab \
&& pip3 install --upgrade lxml beautifulsoup4 \
&& pip3 install --upgrade wheel
RUN pip3 install --upgrade https://github.com/mstamy2/PyPDF2/archive/master.zip
# Python 3 and deps [End]
RUN tesseract --list-langs # just a test
# Clean
RUN rm -rf /tmp/* /var/tmp/*
# Install application
COPY . /opt/install
WORKDIR /opt/install
RUN /opt/install/install_command
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
USER docker
WORKDIR /home/docker
ENTRYPOINT ["/opt/install/docker-wrapper.sh"]
#