Skip to content

Commit

Permalink
base_images: create non-root java base image
Browse files Browse the repository at this point in the history
  • Loading branch information
alafanechere committed Dec 19, 2024
1 parent f0d6f9f commit cb23c63
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 20 deletions.
4 changes: 3 additions & 1 deletion airbyte-ci/connectors/base_images/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ RUN mkdir -p 755 /usr/share/nltk_data
### Example for `airbyte/java-connector-base`:
```dockerfile
FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33
RUN sh -c set -o xtrace && yum update -y --security && yum install -y tar openssl findutils && yum clean all
RUN sh -c set -o xtrace && yum install -y shadow-utils tar openssl findutils && yum update -y --security && yum clean all && rm -rf /var/cache/yum && groupadd --gid 1000 airbyte && useradd --uid 1000 --gid airbyte --shell /bin/bash --create-home airbyte && mkdir /secrets && mkdir /config && mkdir --mode 755 /airbyte && mkdir --mode 755 /custom_cache && chown -R airbyte:airbyte /airbyte && chown -R airbyte:airbyte /custom_cache && chown -R airbyte:airbyte /secrets && chown -R airbyte:airbyte /config && chown -R airbyte:airbyte /usr/share/pki/ca-trust-source && chown -R airbyte:airbyte /etc/pki/ca-trust && chown -R airbyte:airbyte /tmp
ENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec
ENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check
ENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover
Expand Down Expand Up @@ -77,6 +77,8 @@ ENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh

| Version | Published | Docker Image Address | Changelog |
|---------|-----------|--------------|-----------|
| 2.0.0-rc.2 || docker.io/airbyte/java-connector-base:2.0.0-rc.2@sha256:e5543b3de4c38e9ef45dba886bad5ee319b0d7bfe921f310c788f1d4466e25eb | Fine tune permissions and reproduce platform java base implementation |
| 2.0.0-rc.1 || docker.io/airbyte/java-connector-base:2.0.0-rc.1@sha256:484b929684b9e4f60d06cde171ee0b8238802cb434403293fcede81c1e73c537 | Make the java base image non root |
| 1.0.0 || docker.io/airbyte/java-connector-base:1.0.0@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | Create a base image for our java connectors based on Amazon Corretto. |
| 1.0.0-rc.4 || docker.io/airbyte/java-connector-base:1.0.0-rc.4@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | Bundle yum calls in a single RUN |
| 1.0.0-rc.3 || docker.io/airbyte/java-connector-base:1.0.0-rc.3@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | |
Expand Down
59 changes: 43 additions & 16 deletions airbyte-ci/connectors/base_images/base_images/java/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@


class AirbyteJavaConnectorBaseImage(bases.AirbyteConnectorBaseImage):
# TODO: remove this once we want to build the base image with the airbyte user.
USER: Final[str] = "root"

root_image: Final[published_image.PublishedImage] = AMAZON_CORRETTO_21_AL_2023
repository: Final[str] = "airbyte/java-connector-base"

Expand All @@ -42,33 +39,52 @@ def get_container(self, platform: dagger.Platform) -> dagger.Container:
"""

return (
# TODO: Call this when we want to build the base image with the airbyte user
# self.get_base_container(platform)
self.dagger_client.container(platform=platform)
.from_(self.root_image.address)
# Bundle RUN commands together to reduce the number of layers.
.with_exec(
sh_dash_c(
[
# Update first, but in the same .with_exec step as the package installation.
# Otherwise, we risk caching stale package URLs.
"yum update -y --security",
# Shadow-utils is required to add a user with a specific UID and GID.
# tar is equired to untar java connector binary distributions.
# openssl is required because we need to ssh and scp sometimes.
# findutils is required for xargs, which is shipped as part of findutils.
f"yum install -y tar openssl findutils",
f"yum install -y shadow-utils tar openssl findutils",
# Update first, but in the same .with_exec step as the package installation.
# Otherwise, we risk caching stale package URLs.
"yum update -y --security",
# Remove any dangly bits.
"yum clean all",
# Remove the yum cache to reduce the image size.
"rm -rf /var/cache/yum",
# Create the group 'airbyte' with the GID 1000
f"groupadd --gid {self.USER_ID} {self.USER}",
# Create the user 'airbyte' with the UID 1000
f"useradd --uid {self.USER_ID} --gid {self.USER} --shell /bin/bash --create-home {self.USER}",
# Create mount point for secrets and configs
"mkdir /secrets",
"mkdir /config",
# Create the cache airbyte directories and set the right permissions
f"mkdir --mode 755 {self.AIRBYTE_DIR_PATH}",
f"mkdir --mode 755 {self.CACHE_DIR_PATH}",
# Change the owner of the airbyte directory to the user 'airbyte'
f"chown -R {self.USER}:{self.USER} {self.AIRBYTE_DIR_PATH}",
f"chown -R {self.USER}:{self.USER} {self.CACHE_DIR_PATH}",
f"chown -R {self.USER}:{self.USER} /secrets",
f"chown -R {self.USER}:{self.USER} /config",
f"chown -R {self.USER}:{self.USER} /usr/share/pki/ca-trust-source",
f"chown -R {self.USER}:{self.USER} /etc/pki/ca-trust",
f"chown -R {self.USER}:{self.USER} /tmp",
]
)
)
.with_workdir("/airbyte")
.with_workdir(self.AIRBYTE_DIR_PATH)
# Copy the datadog java agent jar from the internet.
.with_file("dd-java-agent.jar", self.dagger_client.http(self.DD_AGENT_JAR_URL))
.with_file("dd-java-agent.jar", self.dagger_client.http(self.DD_AGENT_JAR_URL), owner=self.USER)
# Copy base.sh from the git repo.
.with_file("base.sh", self.dagger_client.http(self.BASE_SCRIPT_URL))
.with_file("base.sh", self.dagger_client.http(self.BASE_SCRIPT_URL), owner=self.USER)
# Copy javabase.sh from the git repo.
.with_file("javabase.sh", self.dagger_client.http(self.JAVA_BASE_SCRIPT_URL))
.with_file("javabase.sh", self.dagger_client.http(self.JAVA_BASE_SCRIPT_URL), owner=self.USER)
# Set a bunch of env variables used by base.sh.
.with_env_variable("AIRBYTE_SPEC_CMD", "/airbyte/javabase.sh --spec")
.with_env_variable("AIRBYTE_CHECK_CMD", "/airbyte/javabase.sh --check")
Expand All @@ -77,6 +93,7 @@ def get_container(self, platform: dagger.Platform) -> dagger.Container:
.with_env_variable("AIRBYTE_WRITE_CMD", "/airbyte/javabase.sh --write")
.with_env_variable("AIRBYTE_ENTRYPOINT", "/airbyte/base.sh")
.with_entrypoint(["/airbyte/base.sh"])
.with_user(self.USER)
)

async def run_sanity_checks(self, platform: dagger.Platform):
Expand All @@ -87,9 +104,19 @@ async def run_sanity_checks(self, platform: dagger.Platform):
Args:
platform (dagger.Platform): The platform on which the sanity checks should run.
"""
container = self.get_container(platform)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
await base_sanity_checks.check_user_can_write_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
container = await self.get_container(platform)
for expected_rw_dir in [
self.AIRBYTE_DIR_PATH,
self.CACHE_DIR_PATH,
"/tmp",
"/secrets",
"/config",
"/usr/share/pki/ca-trust-source",
"/etc/pki/ca-trust",
]:
await base_sanity_checks.check_user_can_write_dir(container, self.USER, expected_rw_dir)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, expected_rw_dir)
await base_sanity_checks.check_user_uid_guid(container, self.USER, self.USER_ID, self.USER_ID)
await base_sanity_checks.check_file_exists(container, "/airbyte/dd-java-agent.jar")
await base_sanity_checks.check_file_exists(container, "/airbyte/base.sh")
await base_sanity_checks.check_file_exists(container, "/airbyte/javabase.sh")
Expand Down
27 changes: 25 additions & 2 deletions airbyte-ci/connectors/base_images/base_images/sanity_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ async def check_user_can_read_dir(container: dagger.Container, user: str, dir_pa
try:
await container.with_exec(["touch", f"{dir_path}/foo.txt"]).with_user(user).with_exec(["cat", f"{dir_path}/foo.txt"])
except dagger.ExecError:
raise errors.SanityCheckError(f"{dir_path} is not readable by the {user}.")
raise errors.SanityCheckError(f"{dir_path} is not readable by {user}.")


async def check_user_cant_write_dir(container: dagger.Container, user: str, dir_path: str):
Expand All @@ -161,7 +161,7 @@ async def check_user_cant_write_dir(container: dagger.Container, user: str, dir_
await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"])
except dagger.ExecError:
return
raise errors.SanityCheckError(f"{dir_path} is writable by the {user}.")
raise errors.SanityCheckError(f"{dir_path} is writable by {user}.")


async def check_user_can_write_dir(container: dagger.Container, user: str, dir_path: str):
Expand Down Expand Up @@ -195,3 +195,26 @@ async def check_file_exists(container: dagger.Container, file_path: str):
await container.with_exec(["test", "-f", file_path])
except dagger.ExecError:
raise errors.SanityCheckError(f"{file_path} does not exist.")


async def check_user_uid_guid(container: dagger.Container, user: str, expected_uid: int, expected_gid: int):
"""Check that the given user has the expected user id and group id.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
expected_uid (int): The expected user id.
expected_gid (int): The expected group id.
Raises:
errors.SanityCheckError: Raised if the user does not have the expected user id or group id.
"""
try:
user_id = (await container.with_user(user).with_exec(["id", "-u"]).stdout()).strip()
if int(user_id) != expected_uid:
raise errors.SanityCheckError(f"Unexpected user id: {user_id}")
group_id = (await container.with_user(user).with_exec(["id", "-g"]).stdout()).strip()
if int(group_id) != expected_gid:
raise errors.SanityCheckError(f"Unexpected group id: {group_id}")
except dagger.ExecError as e:
raise errors.SanityCheckError(e)
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
[
{
"version": "2.0.0-rc.2",
"changelog_entry": "Fine tune permissions and reproduce platform java base implementation",
"dockerfile_example": "FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33\nRUN sh -c set -o xtrace && yum install -y shadow-utils tar openssl findutils && yum update -y --security && yum clean all && rm -rf /var/cache/yum && groupadd --gid 1000 airbyte && useradd --uid 1000 --gid airbyte --shell /bin/bash --create-home airbyte && mkdir /secrets && mkdir /config && mkdir --mode 755 /airbyte && mkdir --mode 755 /custom_cache && chown -R airbyte:airbyte /airbyte && chown -R airbyte:airbyte /custom_cache && chown -R airbyte:airbyte /secrets && chown -R airbyte:airbyte /config && chown -R airbyte:airbyte /usr/share/pki/ca-trust-source && chown -R airbyte:airbyte /etc/pki/ca-trust && chown -R airbyte:airbyte /tmp\nENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec\nENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check\nENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover\nENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read\nENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write\nENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh"
},
{
"version": "2.0.0-rc.1",
"changelog_entry": " Make the java base image non root",
"dockerfile_example": "FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33\nRUN sh -c set -o xtrace && yum update -y --security && yum install -y /usr/sbin/adduser tar openssl findutils && yum clean all && adduser --base-dir /airbyte --uid 1000 --user-group --system airbyte && mkdir --mode 755 /airbyte && mkdir --mode 755 /custom_cache && chown -R airbyte:airbyte /airbyte\nENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec\nENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check\nENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover\nENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read\nENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write\nENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh"
},
{
"version": "1.0.0",
"changelog_entry": "Create a base image for our java connectors based on Amazon Corretto.",
Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/base_images/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "airbyte-connectors-base-images"
version = "1.4.0"
version = "1.5.0"
description = "This package is used to generate and publish the base images for Airbyte Connectors."
authors = ["Augustin Lafanechere <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit cb23c63

Please sign in to comment.