diff --git a/airbyte-ci/connectors/base_images/README.md b/airbyte-ci/connectors/base_images/README.md index 8b6bf9b40237..db92daea2a51 100644 --- a/airbyte-ci/connectors/base_images/README.md +++ b/airbyte-ci/connectors/base_images/README.md @@ -42,7 +42,7 @@ RUN mkdir -p 755 /usr/share/nltk_data ### Example for `airbyte/java-connector-base`: ```dockerfile FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33 -RUN sh -c set -o xtrace && yum update -y --security && yum install -y tar openssl findutils && yum clean all +RUN sh -c set -o xtrace && yum install -y shadow-utils tar openssl findutils && yum update -y --security && yum clean all && rm -rf /var/cache/yum && groupadd --gid 1000 airbyte && useradd --uid 1000 --gid airbyte --shell /bin/bash --create-home airbyte && mkdir /secrets && mkdir /config && mkdir --mode 755 /airbyte && mkdir --mode 755 /custom_cache && chown -R airbyte:airbyte /airbyte && chown -R airbyte:airbyte /custom_cache && chown -R airbyte:airbyte /secrets && chown -R airbyte:airbyte /config && chown -R airbyte:airbyte /usr/share/pki/ca-trust-source && chown -R airbyte:airbyte /etc/pki/ca-trust && chown -R airbyte:airbyte /tmp ENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec ENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check ENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover @@ -77,6 +77,8 @@ ENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh | Version | Published | Docker Image Address | Changelog | |---------|-----------|--------------|-----------| +| 2.0.0-rc.2 | ✅| docker.io/airbyte/java-connector-base:2.0.0-rc.2@sha256:e5543b3de4c38e9ef45dba886bad5ee319b0d7bfe921f310c788f1d4466e25eb | Fine tune permissions and reproduce platform java base implementation | +| 2.0.0-rc.1 | ✅| docker.io/airbyte/java-connector-base:2.0.0-rc.1@sha256:484b929684b9e4f60d06cde171ee0b8238802cb434403293fcede81c1e73c537 | Make the java base image non root | | 1.0.0 | ✅| docker.io/airbyte/java-connector-base:1.0.0@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | Create a base image for our java connectors based on Amazon Corretto. | | 1.0.0-rc.4 | ✅| docker.io/airbyte/java-connector-base:1.0.0-rc.4@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | Bundle yum calls in a single RUN | | 1.0.0-rc.3 | ✅| docker.io/airbyte/java-connector-base:1.0.0-rc.3@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | | diff --git a/airbyte-ci/connectors/base_images/base_images/java/bases.py b/airbyte-ci/connectors/base_images/base_images/java/bases.py index 72a376e3aae8..3f8545f82278 100644 --- a/airbyte-ci/connectors/base_images/base_images/java/bases.py +++ b/airbyte-ci/connectors/base_images/base_images/java/bases.py @@ -15,9 +15,6 @@ class AirbyteJavaConnectorBaseImage(bases.AirbyteConnectorBaseImage): - # TODO: remove this once we want to build the base image with the airbyte user. - USER: Final[str] = "root" - root_image: Final[published_image.PublishedImage] = AMAZON_CORRETTO_21_AL_2023 repository: Final[str] = "airbyte/java-connector-base" @@ -42,33 +39,52 @@ def get_container(self, platform: dagger.Platform) -> dagger.Container: """ return ( - # TODO: Call this when we want to build the base image with the airbyte user - # self.get_base_container(platform) self.dagger_client.container(platform=platform) .from_(self.root_image.address) # Bundle RUN commands together to reduce the number of layers. .with_exec( sh_dash_c( [ - # Update first, but in the same .with_exec step as the package installation. - # Otherwise, we risk caching stale package URLs. - "yum update -y --security", + # Shadow-utils is required to add a user with a specific UID and GID. # tar is equired to untar java connector binary distributions. # openssl is required because we need to ssh and scp sometimes. # findutils is required for xargs, which is shipped as part of findutils. - f"yum install -y tar openssl findutils", + f"yum install -y shadow-utils tar openssl findutils", + # Update first, but in the same .with_exec step as the package installation. + # Otherwise, we risk caching stale package URLs. + "yum update -y --security", # Remove any dangly bits. "yum clean all", + # Remove the yum cache to reduce the image size. + "rm -rf /var/cache/yum", + # Create the group 'airbyte' with the GID 1000 + f"groupadd --gid {self.USER_ID} {self.USER}", + # Create the user 'airbyte' with the UID 1000 + f"useradd --uid {self.USER_ID} --gid {self.USER} --shell /bin/bash --create-home {self.USER}", + # Create mount point for secrets and configs + "mkdir /secrets", + "mkdir /config", + # Create the cache airbyte directories and set the right permissions + f"mkdir --mode 755 {self.AIRBYTE_DIR_PATH}", + f"mkdir --mode 755 {self.CACHE_DIR_PATH}", + # Change the owner of the airbyte directory to the user 'airbyte' + f"chown -R {self.USER}:{self.USER} {self.AIRBYTE_DIR_PATH}", + f"chown -R {self.USER}:{self.USER} {self.CACHE_DIR_PATH}", + f"chown -R {self.USER}:{self.USER} /secrets", + f"chown -R {self.USER}:{self.USER} /config", + f"chown -R {self.USER}:{self.USER} /usr/share/pki/ca-trust-source", + f"chown -R {self.USER}:{self.USER} /etc/pki/ca-trust", + f"chown -R {self.USER}:{self.USER} /tmp", ] ) ) - .with_workdir("/airbyte") + .with_workdir(self.AIRBYTE_DIR_PATH) # Copy the datadog java agent jar from the internet. - .with_file("dd-java-agent.jar", self.dagger_client.http(self.DD_AGENT_JAR_URL)) + .with_file("dd-java-agent.jar", self.dagger_client.http(self.DD_AGENT_JAR_URL), owner=self.USER) # Copy base.sh from the git repo. - .with_file("base.sh", self.dagger_client.http(self.BASE_SCRIPT_URL)) + .with_file("base.sh", self.dagger_client.http(self.BASE_SCRIPT_URL), owner=self.USER) # Copy javabase.sh from the git repo. - .with_file("javabase.sh", self.dagger_client.http(self.JAVA_BASE_SCRIPT_URL)) + .with_file("javabase.sh", self.dagger_client.http(self.JAVA_BASE_SCRIPT_URL), owner=self.USER) # Set a bunch of env variables used by base.sh. .with_env_variable("AIRBYTE_SPEC_CMD", "/airbyte/javabase.sh --spec") .with_env_variable("AIRBYTE_CHECK_CMD", "/airbyte/javabase.sh --check") @@ -77,6 +93,7 @@ def get_container(self, platform: dagger.Platform) -> dagger.Container: .with_env_variable("AIRBYTE_WRITE_CMD", "/airbyte/javabase.sh --write") .with_env_variable("AIRBYTE_ENTRYPOINT", "/airbyte/base.sh") .with_entrypoint(["/airbyte/base.sh"]) + .with_user(self.USER) ) async def run_sanity_checks(self, platform: dagger.Platform): @@ -87,9 +104,19 @@ async def run_sanity_checks(self, platform: dagger.Platform): Args: platform (dagger.Platform): The platform on which the sanity checks should run. """ - container = self.get_container(platform) - await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.AIRBYTE_DIR_PATH) - await base_sanity_checks.check_user_can_write_dir(container, self.USER, self.AIRBYTE_DIR_PATH) + container = await self.get_container(platform) + for expected_rw_dir in [ + self.AIRBYTE_DIR_PATH, + self.CACHE_DIR_PATH, + "/tmp", + "/secrets", + "/config", + "/usr/share/pki/ca-trust-source", + "/etc/pki/ca-trust", + ]: + await base_sanity_checks.check_user_can_write_dir(container, self.USER, expected_rw_dir) + await base_sanity_checks.check_user_can_read_dir(container, self.USER, expected_rw_dir) + await base_sanity_checks.check_user_uid_guid(container, self.USER, self.USER_ID, self.USER_ID) await base_sanity_checks.check_file_exists(container, "/airbyte/dd-java-agent.jar") await base_sanity_checks.check_file_exists(container, "/airbyte/base.sh") await base_sanity_checks.check_file_exists(container, "/airbyte/javabase.sh") diff --git a/airbyte-ci/connectors/base_images/base_images/sanity_checks.py b/airbyte-ci/connectors/base_images/base_images/sanity_checks.py index 3ea3b4e9b310..c57864e265e1 100644 --- a/airbyte-ci/connectors/base_images/base_images/sanity_checks.py +++ b/airbyte-ci/connectors/base_images/base_images/sanity_checks.py @@ -143,7 +143,7 @@ async def check_user_can_read_dir(container: dagger.Container, user: str, dir_pa try: await container.with_exec(["touch", f"{dir_path}/foo.txt"]).with_user(user).with_exec(["cat", f"{dir_path}/foo.txt"]) except dagger.ExecError: - raise errors.SanityCheckError(f"{dir_path} is not readable by the {user}.") + raise errors.SanityCheckError(f"{dir_path} is not readable by {user}.") async def check_user_cant_write_dir(container: dagger.Container, user: str, dir_path: str): @@ -161,7 +161,7 @@ async def check_user_cant_write_dir(container: dagger.Container, user: str, dir_ await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"]) except dagger.ExecError: return - raise errors.SanityCheckError(f"{dir_path} is writable by the {user}.") + raise errors.SanityCheckError(f"{dir_path} is writable by {user}.") async def check_user_can_write_dir(container: dagger.Container, user: str, dir_path: str): @@ -195,3 +195,26 @@ async def check_file_exists(container: dagger.Container, file_path: str): await container.with_exec(["test", "-f", file_path]) except dagger.ExecError: raise errors.SanityCheckError(f"{file_path} does not exist.") + + +async def check_user_uid_guid(container: dagger.Container, user: str, expected_uid: int, expected_gid: int): + """Check that the given user has the expected user id and group id. + + Args: + container (dagger.Container): The container on which the sanity checks should run. + user (str): The user to impersonate. + expected_uid (int): The expected user id. + expected_gid (int): The expected group id. + + Raises: + errors.SanityCheckError: Raised if the user does not have the expected user id or group id. + """ + try: + user_id = (await container.with_user(user).with_exec(["id", "-u"]).stdout()).strip() + if int(user_id) != expected_uid: + raise errors.SanityCheckError(f"Unexpected user id: {user_id}") + group_id = (await container.with_user(user).with_exec(["id", "-g"]).stdout()).strip() + if int(group_id) != expected_gid: + raise errors.SanityCheckError(f"Unexpected group id: {group_id}") + except dagger.ExecError as e: + raise errors.SanityCheckError(e) diff --git a/airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json b/airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json index ca9ab3d5008a..c1ff69f90b68 100644 --- a/airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json +++ b/airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json @@ -1,4 +1,14 @@ [ + { + "version": "2.0.0-rc.2", + "changelog_entry": "Fine tune permissions and reproduce platform java base implementation", + "dockerfile_example": "FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33\nRUN sh -c set -o xtrace && yum install -y shadow-utils tar openssl findutils && yum update -y --security && yum clean all && rm -rf /var/cache/yum && groupadd --gid 1000 airbyte && useradd --uid 1000 --gid airbyte --shell /bin/bash --create-home airbyte && mkdir /secrets && mkdir /config && mkdir --mode 755 /airbyte && mkdir --mode 755 /custom_cache && chown -R airbyte:airbyte /airbyte && chown -R airbyte:airbyte /custom_cache && chown -R airbyte:airbyte /secrets && chown -R airbyte:airbyte /config && chown -R airbyte:airbyte /usr/share/pki/ca-trust-source && chown -R airbyte:airbyte /etc/pki/ca-trust && chown -R airbyte:airbyte /tmp\nENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec\nENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check\nENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover\nENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read\nENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write\nENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh" + }, + { + "version": "2.0.0-rc.1", + "changelog_entry": " Make the java base image non root", + "dockerfile_example": "FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33\nRUN sh -c set -o xtrace && yum update -y --security && yum install -y /usr/sbin/adduser tar openssl findutils && yum clean all && adduser --base-dir /airbyte --uid 1000 --user-group --system airbyte && mkdir --mode 755 /airbyte && mkdir --mode 755 /custom_cache && chown -R airbyte:airbyte /airbyte\nENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec\nENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check\nENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover\nENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read\nENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write\nENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh" + }, { "version": "1.0.0", "changelog_entry": "Create a base image for our java connectors based on Amazon Corretto.", diff --git a/airbyte-ci/connectors/base_images/pyproject.toml b/airbyte-ci/connectors/base_images/pyproject.toml index 6c4e41f34fea..98b8f45e9b36 100644 --- a/airbyte-ci/connectors/base_images/pyproject.toml +++ b/airbyte-ci/connectors/base_images/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "airbyte-connectors-base-images" -version = "1.4.0" +version = "1.5.0" description = "This package is used to generate and publish the base images for Airbyte Connectors." authors = ["Augustin Lafanechere "] readme = "README.md"