Skip to content

Commit

Permalink
Bringing in image directory from deployment repo
Browse files Browse the repository at this point in the history
  • Loading branch information
shaneknapp committed Sep 9, 2024
2 parents b1f06fb + 1e82b97 commit 6000a56
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 0 deletions.
139 changes: 139 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
FROM buildpack-deps:jammy-scm as base

# Set up common env variables
ENV TZ=America/Los_Angeles
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ENV DEBIAN_FRONTEND=noninteractive
ENV NB_USER jovyan
ENV NB_UID 1000
# These are used by the python, R, and final stages
ENV CONDA_DIR /srv/conda
ENV R_LIBS_USER /srv/r

RUN apt-get -qq update --yes && \
apt-get -qq install --yes locales && \
echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \
locale-gen

RUN adduser --disabled-password --gecos "Default Jupyter user" ${NB_USER}

# Install all apt packages
COPY apt.txt /tmp/apt.txt
RUN apt-get -qq update --yes && \
apt-get -qq install --yes --no-install-recommends \
$(grep -v ^# /tmp/apt.txt) && \
apt-get -qq purge && \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*

# Install R.
# These packages must be installed into the base stage since they are in system
# paths rather than /srv.
# Pre-built R packages from rspm are built against system libs in jammy.
ENV R_VERSION=4.4.1-1.2204.0
ENV LITTLER_VERSION=0.3.19-1.2204.0
RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list
RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
RUN apt-get update -qq --yes > /dev/null && \
apt-get install --yes -qq \
r-base-core=${R_VERSION} \
r-base-dev=${R_VERSION} \
littler=${LITTLER_VERSION} > /dev/null

ENV RSTUDIO_URL=https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2024.04.2-764-amd64.deb
RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \
apt install --no-install-recommends --yes /tmp/rstudio.deb && \
rm /tmp/rstudio.deb

ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.20.1002-amd64.deb
RUN curl --silent --location --fail ${SHINY_SERVER_URL} > /tmp/shiny.deb && \
apt install --no-install-recommends --yes /tmp/shiny.deb && \
rm /tmp/shiny.deb

# Set CRAN mirror to rspm before we install anything
COPY Rprofile.site /usr/lib/R/etc/Rprofile.site
# RStudio needs its own config
COPY rsession.conf /etc/rstudio/rsession.conf

# R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads.
# We uncomment the default, and set what we wanna - so it picks up
# the packages we install. Without this, RStudio doesn't see the packages
# that R does.
# Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py
# To try fight https://community.rstudio.com/t/timedatectl-had-status-1/72060,
# which shows up sometimes when trying to install packages that want the TZ
# timedatectl expects systemd running, which isn't true in our containers
RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \
echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron && \
echo "TZ=${TZ}" >> /etc/R/Renviron

# =============================================================================
# This stage exists to build /srv/r.
FROM base as srv-r

# Create user owned R libs dir
# This lets users temporarily install packages
RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER}

# Install R libraries as our user
USER ${NB_USER}

COPY class-libs.R /tmp/class-libs.R
RUN mkdir -p /tmp/r-packages

# Our install.R needs devtools which needs install2.r which needs docopt.
# install2.r is not reproducible, but our install.R script is.
RUN Rscript -e "install.packages('docopt')"
RUN /usr/lib/R/site-library/littler/examples/install2.r devtools

# Install all our base R packages
COPY install.R /tmp/install.R
RUN /tmp/install.R && rm -rf /tmp/downloaded_packages

# =============================================================================
# This stage exists to build /srv/conda.
FROM base as srv-conda

COPY install-mambaforge.bash /tmp/install-mambaforge.bash
RUN /tmp/install-mambaforge.bash

# Install conda environment as our user
USER ${NB_USER}

ENV PATH ${CONDA_DIR}/bin:$PATH

COPY infra-requirements.txt /tmp/infra-requirements.txt
COPY environment.yml /tmp/environment.yml

RUN mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml && \
mamba clean -afy

# =============================================================================
# This stage consumes base and import /srv/r and /srv/conda.
FROM base as final
COPY --from=srv-r /srv/r /srv/r
COPY --from=srv-conda /srv/conda /srv/conda

# Install IR kernelspec. Requires python and R.
ENV PATH ${CONDA_DIR}/bin:${PATH}:${R_LIBS_USER}/bin
RUN R -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')"

# clear out /tmp
USER root
#COPY postBuild /tmp/postBuild
#RUN chmod +x /tmp/postBuild
#RUN /tmp/postBuild
RUN rm -rf /tmp/*


USER ${NB_USER}
WORKDIR /home/${NB_USER}

EXPOSE 8888

ENTRYPOINT ["tini", "--"]
23 changes: 23 additions & 0 deletions Rprofile.site
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Use RStudio's CRAN mirror to get binary packages. "latest" just means it has
# all available versions. We can specify version numbers in
# devtools::install_version.
options(
repos = c(
CRAN = "https://packagemanager.rstudio.com/all/__linux__/jammy/latest"
)
)

# RStudio sets the HTTPUserAgent option which causes it to download binaries,
# rather than source, from the Posit Public Package Manager. In order for the
# RStudio terminal, Jupyter terminal, Jupyter R notebook, or anything else to
# do the same, we must manually set HTTPUserAgent. This speeds up package
# installation.
# We fake rstudio version because `RStudio.Version()$version` is only available
# when we are in RStudio.
options(
HTTPUserAgent = sprintf(
"RStudio Server (%s); R (%s)",
"2024.04.2.764",
paste(getRversion(), R.version$platform, R.version$arch, R.version$os)
)
)
37 changes: 37 additions & 0 deletions apt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# our entry point
tini

# useful utilities
tar
less
vim

# for nbconvert, knitr, etc.
texlive-xetex
texlive-fonts-recommended
texlive-plain-generic
# provides FandolSong-Regular.otf for issue #2714
texlive-lang-chinese

# for notebook-as-pdf
libx11-xcb1
libxtst6
libxrandr2
libasound2
libpangocairo-1.0-0
libatk1.0-0
libatk-bridge2.0-0
libgtk-3-0
libnss3
libxss1
fonts-noto-color-emoji

# for rstudio
#psmisc
#sudo
#libapparmor1
lsb-release
libclang-dev

# Used by littler for IRkernel::installspec
libzmq5
20 changes: 20 additions & 0 deletions class-libs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env Rscript

class_libs_install_version <- function(class_name, class_libs) {
print(paste("Installing packages for", class_name))
for (i in seq(1, length(class_libs), 2)) {
installed_packages <- rownames(installed.packages())
package_name = class_libs[i]
version = class_libs[i+1]
# Only install packages if they haven't already been installed!
# devtools doesn't do that by default
if (!package_name %in% installed_packages) {
print(paste("Installing", package_name, version))
devtools::install_version(package_name, version, quiet=TRUE)
} else {
# FIXME: This ignores version incompatibilities :'(
print(paste("Not installing", package_name, " as it is already installed"))
}
}
print(paste("Done installing packages for", class_name))
}
21 changes: 21 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
dependencies:
- python=3.11.* # mamba installation with py3.11 fails: No module named 'conda.cliconda'
- pip=23.1.*
- nodejs=18.*
# bug w/notebook and traitlets: https://github.com/jupyter/notebook/issues/7048
- traitlets=5.9.*

- jupyter-server-proxy==4.2.0
- jupyter-rsession-proxy==2.2.0

- syncthing==1.23.5
- pyppeteer==1.0.2

# for nbconvert
- pandoc==3.1.3

# for jupyter-tree-download
- zip==3.0
- pip:
- -r infra-requirements.txt
- jupyter-shiny-proxy==1.1
29 changes: 29 additions & 0 deletions infra-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# WARNING: Original source at scripts/infra-packages/requirements.txt
# PLEASE DO NOT EDIT ELSEWHERE
# After editing scripts/infra-packages/requirements.txt, please run
# scripts/infra-packages/sync.bash.

# This file pins versions of notebook related python packages we want
# across all hubs. This makes sure we don't need to upgrade them
# everwhere one by one.

# FIXME: Freeze this to get exact versions of all dependencies
notebook==7.0.7
jupyterlab==4.0.11
nbgitpuller==1.2.1
jupyter-resource-usage==1.0.1
# Matches version in images/hub/Dockerfile
jupyterhub==4.1.6
appmode==0.8.0
ipywidgets==8.0.7
jupyter-tree-download==1.0.1
git-credential-helpers==0.2
# Measure popularity of different packages in our hubs
# https://discourse.jupyter.org/t/request-for-implementation-instrument-libraries-actively-used-by-users-on-a-jupyterhub/7994?u=yuvipanda
git+https://github.com/shaneknapp/python-popularity-contest.git@add-error-handling
# RISE is useful for presentations - see https://github.com/berkeley-dsep-infra/datahub/issues/2527
RISE==5.7.1
# syncthing for dropbox-like functionality
jupyter-syncthing-proxy==1.0.3
# jupyter archival tool for easy user downloads
jupyter-archive==3.4.0
41 changes: 41 additions & 0 deletions install-mambaforge.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

#
# Download and install a pinned version of mambaforge.
#

set -ex

cd $(dirname $0)
MAMBAFORGE_VERSION=24.3.0-0

URL="https://github.com/conda-forge/miniforge/releases/download/${MAMBAFORGE_VERSION}/Mambaforge-${MAMBAFORGE_VERSION}-Linux-x86_64.sh"
INSTALLER_PATH=/tmp/mambaforge-installer.sh

wget --quiet $URL -O ${INSTALLER_PATH}
chmod +x ${INSTALLER_PATH}

bash ${INSTALLER_PATH} -b -p ${CONDA_DIR}
export PATH="${CONDA_DIR}/bin:$PATH"

# Do not attempt to auto update conda or dependencies
conda config --system --set auto_update_conda false
conda config --system --set show_channel_urls true

# Empty the conda history file, which seems to result in some effective pinning
# of packages in the initial env, which we don't intend. This file must not be
# removed.
> ${CONDA_DIR}/conda-meta/history

# Clean things out!
conda clean --all -f -y

# Remove the big installer so we don't increase docker image size too much
rm ${INSTALLER_PATH}

# Remove the pip cache created as part of installing mambaforge
rm -rf ${HOME}/.cache

chown -R $NB_USER:$NB_USER ${CONDA_DIR}

conda list -n root
18 changes: 18 additions & 0 deletions install.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env r

# Install devtools so we can install versioned packages
#install.packages("devtools")

source("/tmp/class-libs.R")

# R packages to be installed that aren't from apt
# Combination of informal requests & rocker image suggestions
# Some of these were already in datahub image
cran_packages = c(
"BiocManager", "1.30.21",
"IRkernel", "1.3.2",
"rmarkdown", "2.22",
"shiny", "1.7.4"
)

class_libs_install_version("Base packages", cran_packages)
2 changes: 2 additions & 0 deletions rsession.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Use binary packages!
r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/jammy/latest

0 comments on commit 6000a56

Please sign in to comment.