-
Notifications
You must be signed in to change notification settings - Fork 14
/
Dockerfile.gpu
237 lines (201 loc) · 9.37 KB
/
Dockerfile.gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
FROM nvidia/cuda:9.0-cudnn7-runtime-centos7
ENV BUILDER_VERSION 1.0
MAINTAINER Subin Modeel <[email protected]>
ENV CUDA_HOME="/usr/local/cuda"
ENV CUDA_PATH="/usr/local/cuda"
ENV PATH="/usr/local/cuda/bin${PATH:+:${PATH}}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}";
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs/:$LD_LIBRARY_PATH
ENV CUDA_VERSION 9.0.176
LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
ENV NVIDIA_CUDA_VERSION $CUDA_VERSION
ENV CUDA_PKG_VERSION=$CUDA_VERSION-1
# nvidia-docker 1.0
LABEL com.nvidia.volumes.needed="nvidia_driver"
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
USER root
ENV BUILDER_VERSION 1.0
LABEL io.k8s.description="S2I builder for Tensorflow binaries." \
io.k8s.display-name="Tensorflow BUILD" \
io.openshift.expose-services="8080:http" \
io.openshift.tags="builder,python,tf-build" \
io.openshift.s2i.scripts-url="image:///usr/libexec/s2i"
## taken/adapted from jupyter dockerfiles
# Not essential, but wise to set the lang
# Note: Users with other languages should set this in their derivative image
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL=""
ENV PYTHONIOENCODING UTF-8
ENV NB_USER=default
ENV NB_UID=1001
ENV NB_PYTHON_VER=2.7
ENV PYTHON_BIN_PATH=/usr/bin/python
ENV TINI_VERSION=v0.16.1
## Bazel
ENV PYTHON_LIB_PATH=/usr/lib64/python2.7/site-packages
ENV LD_LIBRARY_PATH="/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}";
ENV BAZELRC /root/.bazelrc
ENV BAZEL_VERSION 0.15.0
ENV CURL_VERSION 7.54.1
################################################
## Tensorflow ./configure options for Bazel
################################################
ENV PYTHON_BIN_PATH=/usr/bin/python
ENV CC_OPT_FLAGS -march=native
ENV TF_NEED_JEMALLOC 1
ENV TF_NEED_GCP 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_HDFS 0
ENV TF_ENABLE_XLA 0
ENV TF_NEED_OPENCL 0
ENV TF_NEED_CUDA 0
ENV TF_NEED_MPI 0
ENV TF_NEED_GDR 0
ENV TF_NEED_S3 0
ENV TENSORBOARD_LOG_DIR /workspace
ENV PATH /usr/local/bin:$PATH
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-1.8.0*
# Tensorflow compilation files
ENV TENSORFLOW_BUILD_FILES /root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/host/ \
/root/.cache/bazel/_bazel_root/install/ \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/external/org_tensorflow/tensorflow/core/ \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/external/org_tensorflow/tensorflow/c/ \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/external/org_tensorflow/tensorflow/cc/ \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/external/org_tensorflow/tensorflow/stream_executor/ \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/batching \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/servables \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/core \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/apis \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/test_util \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/config \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/resources \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/sources \
/root/.cache/bazel/_bazel_root/*/execroot/serving/bazel-out/local-opt/bin/tensorflow_serving/util \
/usr/local/lib/bazel \
/usr/local/bin/bazel \
/root/.cache/pip/ \
/serving/.git \
/serving/tf_models/syntaxnet
ADD entrypoint /entrypoint
ADD build_tools /build_tools
RUN echo 'PS1="\u@\h:\w\\$ \[$(tput sgr0)\]"' >> /root/.bashrc \
&& chmod +x /entrypoint \
&& useradd -m -d /home/default default \
&& yum install -y epel-release \
&& yum install -y tar tree which git curl wget java-headless bzip2 gnupg2 sqlite3 python-pip protobuf-compiler \
&& yum install dkms -y \
&& yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm \
&& yum install -y https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-repo-rhel7-8.0.61-1.x86_64.rpm \
&& yum install -y cuda-9.0.176-1 \
&& curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py" && python get-pip.py \
&& ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \
&& mkdir -p /opt/app-root/ \
&& chown -R 1001:1001 /opt/app-root \
&& chgrp -R root /opt \
&& chmod -R a+rwx /opt \
&& chmod a+rw /etc/passwd \
&& yum install -y gcc gcc-c++ glibc-devel openssl-devel \
&& yum install -y kernel-devel make automake autoconf swig zip unzip libtool binutils \
&& yum install -y freetype-devel libpng12-devel zlib-devel giflib-devel zeromq3-devel \
&& yum install -y which libxml2 libxml2-devel libxslt libxslt-devel gzip python-devel \
&& yum install -y java-1.8.0-openjdk java-1.8.0-openjdk-devel patch gdb file pciutils cmake \
&& wget -q https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini -P /tmp \
&& wget -q https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini.asc -P /tmp \
&& cd /tmp \
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys 0527A9B7 && gpg --verify /tmp/tini.asc \
&& mv /tmp/tini /usr/local/bin/tini \
&& chmod +x /usr/local/bin/tini \
&& chown -R 1001:1001 /opt/app-root \
&& chgrp -R root /opt/app-root \
&& chmod -R ug+rwx /opt/app-root \
&& echo "startup --batch" >>/root/.bazelrc \
&& echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >>/root/.bazelrc
#https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/ci_build/install/.bazelrc
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
#
#Size of serving repo is 700MB+
# instead of git clone --recurse-submodules https://github.com/tensorflow/serving to download
# A cloned repo is found under serving folder
#Size of bazel-$BAZEL_VERSION-installer-linux-x86_64.sh is 200MB+
# downloaded file is available under bazel folder
# No yum commands here
# removed python to fix numpy issue
RUN mkdir -p /tf \
&& pip install --upgrade pip \
&& pip install -U setuptools \
&& pip install enum34 futures mock six pixiedust pillow \
&& pip install 'protobuf>=3.0.0a3' \
#&& pip install -i https://testpypi.python.org/simple --pre grpcio pyyaml \
&& pip install grpcio_tools mock tensorflow-serving-api \
&& mkdir -p /tf/tools \
&& cd /tf/tools \
&& wget -q https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh \
&& ls -l /tf/tools \
&& chmod +x bazel-$BAZEL_VERSION-installer-linux-x86_64.sh \
&& ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh \
&& bazel \
&& cd /tf \
&& cd /tf/tools \
&& git clone https://github.com/tatsuhiro-t/nghttp2.git \
&& cd nghttp2 \
&& autoreconf -i \
&& automake \
&& autoconf \
&& ./configure \
&& make \
&& make install \
&& echo '/usr/local/lib' > /etc/ld.so.conf.d/custom-libs.conf \
&& ldconfig \
&& cd /tf/tools \
&& wget http://curl.haxx.se/download/curl-$CURL_VERSION.tar.bz2 \
&& tar -xvjf curl-$CURL_VERSION.tar.bz2 \
&& cd curl-$CURL_VERSION \
&& ./configure --with-nghttp2=/usr/local --with-ssl \
&& make \
&& make install \
&& ldconfig \
&& touch /usr/include/stropts.h \
&& rm -fr $TENSORFLOW_BUILD_FILES \
&& rm -rf /root/.npm \
&& rm -rf /root/.cache \
&& rm -rf /root/.config \
&& rm -rf /root/.local \
&& rm -rf /root/tmp \
&& rm -f /tf/tools/curl-$CURL_VERSION.tar.bz2 \
&& usermod -g root $NB_USER \
&& mkdir -p /workspace \
&& chown $NB_UID:root /workspace \
&& chmod 1777 /workspace \
&& mkdir -p /home/$NB_USER \
&& chown -R $NB_UID:root /home/$NB_USER \
&& chmod g+rwX,o+rX -R /home/$NB_USER
COPY ./s2i/bin/ /usr/libexec/s2i
# NO CLEANUP
# Donot add below commands
# && yum erase -y gcc gcc-c++ glibc-devel \
# && yum clean all -y \
# TensorBoard
EXPOSE 6006
EXPOSE 8080
ENV HOME /home/$NB_USER
# This default user is created in the openshift/base-centos7 image
USER 1001
# Make the default PWD somewhere that the user can write. This is
# useful when connecting with 'oc run' and starting a 'spark-shell',
# which will likely try to create files and directories in PWD and
# error out if it cannot.
WORKDIR /workspace
ENTRYPOINT ["/entrypoint"]
# TODO: Set the default CMD for the image
CMD ["/usr/libexec/s2i/usage"]