Skip to content

Commit

Permalink
Add arm remote builder with ubuntu image
Browse files Browse the repository at this point in the history
Signed-off-by: Joonas Rautiola <[email protected]>
  • Loading branch information
joinemm committed Mar 8, 2024
1 parent 0763884 commit d9aa5b5
Show file tree
Hide file tree
Showing 10 changed files with 481 additions and 16 deletions.
3 changes: 3 additions & 0 deletions ssh-keys.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ flokli:
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPTVTXOutUZZjXLB0lUSgeKcSY/8mxKkC0ingGK1whD2
hrosten:
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHFuB+uEjhoSdakwiKLD3TbNpbjnlXerEfZQbtRgvdSz
jrautiola:
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGlFqSQFoSSuAS1IjmWBFXie329I5Aqf71QhVOnLTBG+ joonas@x1
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB3h/Aj66ndKFtqpQ8H53tE9KbbO0obThC0qbQQKFQRr joonas@zeus
71 changes: 71 additions & 0 deletions terraform/arm-builder.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# SPDX-FileCopyrightText: 2022-2024 TII (SSRC) and the Ghaf contributors
# SPDX-License-Identifier: Apache-2.0

locals {
arm_num_builders = local.opts[local.conf].num_builders_aarch64
}

module "arm_builder_vm" {
source = "./modules/arm-builder-vm"

count = local.arm_num_builders

resource_group_name = azurerm_resource_group.infra.name
location = azurerm_resource_group.infra.location
virtual_machine_name = "ghaf-builder-aarch64-${count.index}-${local.ws}"
virtual_machine_size = local.opts[local.conf].vm_size_builder_aarch64
virtual_machine_osdisk_size = local.opts[local.conf].osdisk_size_builder

virtual_machine_custom_data = join("\n", ["#cloud-config", yamlencode({
users = [{
name = "remote-build"
ssh_authorized_keys = [
"${data.azurerm_key_vault_secret.ssh_remote_build_pub.value}"
]
}]
write_files = [
{
content = "AZURE_STORAGE_ACCOUNT_NAME=${data.azurerm_storage_account.binary_cache.name}",
"path" = "/var/lib/rclone-http/env"
}
],
})])

subnet_id = azurerm_subnet.builders.id
}

# Allow inbound SSH from the jenkins subnet (only)
resource "azurerm_network_interface_security_group_association" "arm_builder_vm" {
count = local.arm_num_builders

network_interface_id = module.arm_builder_vm[count.index].virtual_machine_network_interface_id
network_security_group_id = azurerm_network_security_group.arm_builder_vm[count.index].id
}

resource "azurerm_network_security_group" "arm_builder_vm" {
count = local.arm_num_builders

name = "arm-builder-vm-${count.index}"
resource_group_name = azurerm_resource_group.infra.name
location = azurerm_resource_group.infra.location

security_rule {
name = "AllowSSHFromJenkins"
priority = 400
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
source_port_range = "*"
destination_port_ranges = [22]
source_address_prefix = azurerm_subnet.jenkins.address_prefixes[0]
destination_address_prefix = "*"
}
}

# Allow the VMs to read from the binary cache bucket
resource "azurerm_role_assignment" "arm_builder_access_binary_cache" {
count = local.arm_num_builders
scope = data.azurerm_storage_container.binary_cache_1.resource_manager_id
role_definition_name = "Storage Blob Data Reader"
principal_id = module.arm_builder_vm[count.index].virtual_machine_identity_principal_id
}
2 changes: 1 addition & 1 deletion terraform/binary-cache.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ module "binary_cache_vm" {

virtual_machine_custom_data = join("\n", ["#cloud-config", yamlencode({
users = [
for user in toset(["bmg", "flokli", "hrosten"]) : {
for user in toset(["bmg", "flokli", "hrosten", "jrautiola"]) : {
name = user
sudo = "ALL=(ALL) NOPASSWD:ALL"
ssh_authorized_keys = local.ssh_keys[user]
Expand Down
6 changes: 3 additions & 3 deletions terraform/builder.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module "builder_image" {
}

locals {
num_builders = local.opts[local.conf].num_builders
num_builders = local.opts[local.conf].num_builders_x86
}

module "builder_vm" {
Expand All @@ -25,8 +25,8 @@ module "builder_vm" {

resource_group_name = azurerm_resource_group.infra.name
location = azurerm_resource_group.infra.location
virtual_machine_name = "ghaf-builder-${count.index}-${local.ws}"
virtual_machine_size = local.opts[local.conf].vm_size_builder
virtual_machine_name = "ghaf-builder-x86-${count.index}-${local.ws}"
virtual_machine_size = local.opts[local.conf].vm_size_builder_x86
virtual_machine_osdisk_size = local.opts[local.conf].osdisk_size_builder
virtual_machine_source_image = module.builder_image.image_id

Expand Down
16 changes: 10 additions & 6 deletions terraform/jenkins-controller.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module "jenkins_controller_vm" {

virtual_machine_custom_data = join("\n", ["#cloud-config", yamlencode({
users = [
for user in toset(["bmg", "flokli", "hrosten"]) : {
for user in toset(["bmg", "flokli", "hrosten", "jrautiola"]) : {
name = user
sudo = "ALL=(ALL) NOPASSWD:ALL"
ssh_authorized_keys = local.ssh_keys[user]
Expand All @@ -53,15 +53,19 @@ module "jenkins_controller_vm" {
# rather than having to recreate the VM whenever the list of builders is
# changed.
{
content = join("\n", [
for ip in toset(module.builder_vm[*].virtual_machine_private_ip_address) : "ssh://remote-build@${ip} x86_64-linux /etc/secrets/remote-build-ssh-key 10 10 kvm,big-parallel - -"
]),
content = join("\n", concat(
[for ip in toset(module.builder_vm[*].virtual_machine_private_ip_address) : "ssh://remote-build@${ip} x86_64-linux /etc/secrets/remote-build-ssh-key 10 1 kvm,nixos-test,benchmark,big-parallel - -"],
[for ip in toset(module.arm_builder_vm[*].virtual_machine_private_ip_address) : "ssh://remote-build@${ip} aarch64-linux /etc/secrets/remote-build-ssh-key 8 1 kvm,nixos-test,benchmark,big-parallel - -"]
)),
"path" = "/etc/nix/machines"
},
# Render /var/lib/builder-keyscan/scanlist, so known_hosts can be populated.
{
content = join("\n", toset(module.builder_vm[*].virtual_machine_private_ip_address))
"path" = "/var/lib/builder-keyscan/scanlist"
content = join("\n", toset(concat(
module.builder_vm[*].virtual_machine_private_ip_address,
module.arm_builder_vm[*].virtual_machine_private_ip_address
))),
"path" = "/var/lib/builder-keyscan/scanlist"
},
{
content = "SITE_ADDRESS=ghaf-jenkins-controller-${local.ws}.${azurerm_resource_group.infra.location}.cloudapp.azure.com",
Expand Down
18 changes: 12 additions & 6 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -79,29 +79,35 @@ locals {
priv = {
vm_size_binarycache = "Standard_D2_v3"
osdisk_size_binarycache = "50"
vm_size_builder = "Standard_D2_v3"
vm_size_builder_x86 = "Standard_D2_v3"
vm_size_builder_aarch64 = "Standard_D2ps_v5"
osdisk_size_builder = "150"
vm_size_controller = "Standard_E2_v5"
osdisk_size_controller = "150"
num_builders = 1
num_builders_x86 = 1
num_builders_aarch64 = 1
}
dev = {
vm_size_binarycache = "Standard_D2_v3"
osdisk_size_binarycache = "250"
vm_size_builder = "Standard_D4_v3"
vm_size_builder_x86 = "Standard_D4_v3"
vm_size_builder_aarch64 = "Standard_D4ps_v5"
osdisk_size_builder = "250"
vm_size_controller = "Standard_E4_v5"
osdisk_size_controller = "500"
num_builders = 1
num_builders_x86 = 1
num_builders_aarch64 = 1
}
prod = {
vm_size_binarycache = "Standard_D2_v3"
osdisk_size_binarycache = "250"
vm_size_builder = "Standard_D8_v3"
vm_size_builder_x86 = "Standard_D8_v3"
vm_size_builder_aarch64 = "Standard_D8ps_v5"
osdisk_size_builder = "500"
vm_size_controller = "Standard_E4_v5"
osdisk_size_controller = "1000"
num_builders = 2
num_builders_x86 = 2
num_builders_aarch64 = 2
}
}

Expand Down
19 changes: 19 additions & 0 deletions terraform/modules/arm-builder-vm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!--
SPDX-FileCopyrightText: 2022-2024 TII (SSRC) and the Ghaf contributors
SPDX-License-Identifier: Apache-2.0
-->

# arm-builder-vm

Terraform module spinning up a Azure aarch64 VM with ubuntu and nix.

Modified from `azurerm-linux-vm`

## Why not NixOS Image?

- `virtualisation.azure.agent` does not support anything that isn't x86, [quite explicitly](https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/virtualisation/azure-agent.nix#L38)

- aarch64 azure vms (Standard_D2ps_v5 etc.) are all v5, and as such only support [Generation 2 hypervisor images](https://learn.microsoft.com/en-us/azure/virtual-machines/generation-2), which nix also lacks support for.
There is a [stale pull request](https://github.com/NixOS/nixpkgs/pull/236110) in nixpkgs that tries to fix this issue but it has not been active since june 2023. Part of the problem is that Gen2 images use EFI boot.

For these reasons, this arm builder is using ubuntu with nix installed on top, configured to be similar to the x86 builder's nixos configuration.
172 changes: 172 additions & 0 deletions terraform/modules/arm-builder-vm/ubuntu-builder.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/env bash

# SPDX-FileCopyrightText: 2022-2024 TII (SSRC) and the Ghaf contributors
# SPDX-License-Identifier: Apache-2.0

set -x # debug

################################################################################

# Assume root if HOME and USER are unset
[ -z "${HOME}" ] && export HOME="/root"
[ -z "${USER}" ] && export USER="root"

################################################################################

apt_update() {
sudo apt-get update -y
sudo apt-get upgrade -y
sudo apt-get install -y ca-certificates curl xz-utils
}

install_nix() {
type="$1"
if [ "$type" = "single" ]; then
# Single-user
sh <(curl -L https://nixos.org/nix/install) --yes --no-daemon
elif [ "$type" = "multi" ]; then
# Multi-user
sh <(curl -L https://nixos.org/nix/install) --yes --daemon
else
echo "Error: unknown installation type: '$type'"
exit 1
fi
# Fix https://github.com/nix-community/home-manager/issues/3734:
sudo mkdir -m 0755 -p /nix/var/nix/{profiles,gcroots}/per-user/"$USER"
sudo chown -R "$USER:nixbld" "/nix/var/nix/profiles/per-user/$USER"
# Enable flakes
extra_nix_conf="experimental-features = nix-command flakes"
sudo sh -c "printf '$extra_nix_conf\n'>>/etc/nix/nix.conf"
# https://github.com/NixOS/nix/issues/1078#issuecomment-1019327751
for f in /nix/var/nix/profiles/default/bin/nix*; do
sudo ln -fs "$f" "/usr/bin/$(basename "$f")"
done
}

configure_builder() {
# Add user: remote-build
# Extra nix config for the builder,
# for detailed description of each of the below options see:
# https://nixos.org/manual/nix/stable/command-ref/conf-file
extra_nix_conf="
# 20 GB (20*1024*1024*1024)
min-free = 21474836480
# 500 GB (500*1024*1024*1024)
# osdisk size for prod builders
max-free = 536870912000
system-features = nixos-test benchmark big-parallel kvm
trusted-users = remote-build
substituters = http://localhost:8080 https://cache.vedenemo.dev https://cache.nixos.org
trusted-public-keys = ghaf-infra-dev:EdgcUJsErufZitluMOYmoJDMQE+HFyveI/D270Cr84I= cache.vedenemo.dev:8NhplARANhClUSWJyLVk4WMyy1Wb4rhmWW2u8AejH9E= cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
sudo sh -c "printf '$extra_nix_conf\n' >> /etc/nix/nix.conf"
}

configure_rclone() {
# The version of rclone in ubuntu repositories is too old to include --azureblob-env-auth
# https://rclone.org/install/
sudo -v
curl https://rclone.org/install.sh | sudo bash
service_file="
[Unit]
After=network.target
Requires=network.target
[Service]
DynamicUser=true
EnvironmentFile=/var/lib/rclone-http/env
ExecStart=/usr/bin/rclone serve http --azureblob-env-auth --read-only --addr localhost:8080 :azureblob:binary-cache-v1
Restart=always
RestartSec=2
RuntimeDirectory=rclone-http
Type=notify"
sudo sh -c "printf '$service_file\n' > /etc/systemd/system/rclone-http.service"
sudo systemctl daemon-reload
sudo systemctl enable rclone-http.service
sudo systemctl start rclone-http.service
}

restart_nix_daemon() {
# Re-start nix-daemon
if systemctl list-units | grep -iq "nix-daemon"; then
sudo systemctl restart nix-daemon
if ! systemctl status nix-daemon; then
echo "Error: nix-daemon failed to start"
exit 1
fi
fi
}

uninstall_nix() {
# https://github.com/NixOS/nix/issues/1402
if grep -q nixbld /etc/passwd; then
grep nixbld /etc/passwd | awk -F ":" '{print $1}' | xargs -t -n 1 sudo userdel -r
fi
if grep -q nixbld /etc/group; then
sudo groupdel nixbld
fi
rm -rf "$HOME/"{.nix-channels,.nix-defexpr,.nix-profile,.config/nixpkgs,.config/nix,.config/home-manager,.local/state/nix,.local/state/home-manager}
sudo rm -rf /etc/profile.d/nix.sh
if [ -d "/nix" ]; then
sudo rm -rf /nix
fi
if [ -d "/etc/nix" ]; then
sudo rm -fr /etc/nix
fi
sudo find /etc -iname "*backup-before-nix*" -delete
sudo find -L /usr/bin -iname "nix*" -delete
[ -f "$HOME/.profile" ] && sed -i "/\/nix/d" "$HOME/.profile"
[ -f "$HOME/.bash_profile" ] && sed -i "/\/nix/d" "$HOME/.bash_profile"
[ -f "$HOME/.bashrc" ] && sed -i "/\/nix/d" "$HOME/.bashrc"
if systemctl list-units | grep -iq "nix-daemon"; then
sudo systemctl stop nix-daemon nix-daemon.socket
sudo systemctl disable nix-daemon nix-daemon.socket
sudo find /etc/systemd -iname "*nix-daemon*" -delete
sudo find /usr/lib/systemd -iname "*nix-daemon*" -delete
sudo systemctl daemon-reload
sudo systemctl reset-failed
fi
unset NIX_PATH
}

outro() {
set +x
echo ""
nixpkgs_ver=$(nix-instantiate --eval -E '(import <nixpkgs> {}).lib.version' 2>/dev/null)
if [ -n "$nixpkgs_ver" ]; then
echo "Installed nixpkgs version: $nixpkgs_ver"
else
echo "Failed reading installed nixpkgs version"
exit 1
fi
echo ""
echo "Open a new terminal for the changes to take impact"
echo ""
}

exit_unless_command_exists() {
if ! command -v "$1" 2>/dev/null; then
echo "Error: command '$1' is not installed" >&2
exit 1
fi
}

################################################################################

main() {
exit_unless_command_exists "apt-get"
exit_unless_command_exists "systemctl"
apt_update
uninstall_nix
install_nix "multi"
configure_builder
configure_rclone
restart_nix_daemon
exit_unless_command_exists "nix-shell"
outro
}

################################################################################

main "$@"

################################################################################
Loading

0 comments on commit d9aa5b5

Please sign in to comment.