From da685e3e4827cd17848e4925f8b1e4311b31f993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enes=20=C3=96zt=C3=BCrk?= Date: Wed, 13 Nov 2024 16:14:21 +0200 Subject: [PATCH] Fetcher scripts for MS and Ghaf URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * A single script to fetch additional MS and Ghaf URLs in net-vm. * Handling of PAC file by the fetcher script in business-vm. Signed-off-by: Enes Öztürk --- modules/reference/appvms/business.nix | 152 +++++++++++-- modules/reference/programs/google-chrome.nix | 29 ++- .../services/proxy-server/3proxy-config.nix | 205 ++++++------------ .../services/proxy-server/ms_url_fetcher.nix | 76 ------- .../services/proxy-server/url_fetcher.nix | 144 ++++++++++++ 5 files changed, 373 insertions(+), 233 deletions(-) delete mode 100644 modules/reference/services/proxy-server/ms_url_fetcher.nix create mode 100644 modules/reference/services/proxy-server/url_fetcher.nix diff --git a/modules/reference/appvms/business.nix b/modules/reference/appvms/business.nix index e4b6344ae..14f6064d9 100644 --- a/modules/reference/appvms/business.nix +++ b/modules/reference/appvms/business.nix @@ -11,12 +11,14 @@ let inherit (lib) mkIf optionalString; #TODO: Move this to a common place name = "business"; + proxyUserName = "proxy-user"; + proxyGroupName = "proxy-admin"; tiiVpnAddr = "151.253.154.18"; - vpnOnlyAddr = "${tiiVpnAddr},jira.tii.ae,access.tii.ae,confluence.tii.ae,i-service.tii.ae,catalyst.atrc.ae"; + pacFileName = "ghaf.pac"; + pacServerAddr = "127.0.0.1:8000"; + pacFileUrl = "http://${pacServerAddr}/${pacFileName}"; netvmEntry = builtins.filter (x: x.name == "net-vm") config.ghaf.networking.hosts.entries; netvmAddress = lib.head (builtins.map (x: x.ip) netvmEntry); - adminvmEntry = builtins.filter (x: x.name == "admin-vm") config.ghaf.networking.hosts.entries; - adminvmAddress = lib.head (builtins.map (x: x.ip) adminvmEntry); # Remove rounded corners from the text editor window gnomeTextEditor = pkgs.gnome-text-editor.overrideAttrs (oldAttrs: { postPatch = @@ -25,6 +27,75 @@ let echo -e '\nwindow { border-radius: 0px; }' >> src/style.css ''; }); + + _ghafPacFileFetcher = + let + pacFileDownloadUrl = "https://raw.githubusercontent.com/tiiuae/ghaf-rt-config/refs/heads/main/network/proxy/ghaf.pac"; + proxyServerUrl = "http://${netvmAddress}:${toString config.ghaf.reference.services.proxy-server.bindPort}"; + logTag = "ghaf-pac-fetcher"; + in + pkgs.writeShellApplication { + name = "ghafPacFileFetcher"; + runtimeInputs = [ + pkgs.coreutils # Provides 'mv', 'rm', etc. + pkgs.curl # For downloading PAC files + pkgs.inetutils # Provides 'logger' + ]; + text = '' + # Variables + TEMP_PAC_PATH=$(mktemp) + LOCAL_PAC_PATH="/etc/proxy/${pacFileName}" + + # Logging function with timestamp + log() { + logger -t "${logTag}" "$1" + } + + log "Starting the pac file fetch process..." + + # Fetch the pac file using curl with a proxy + log "Fetching pac file from ${pacFileDownloadUrl} using proxy ${proxyServerUrl}..." + http_status=$(curl --proxy "${proxyServerUrl}" -s -o "$TEMP_PAC_PATH" -w "%{http_code}" "${pacFileDownloadUrl}") + + log "HTTP status code: $http_status" + + # Check if the fetch was successful + if [[ "$http_status" -ne 200 ]]; then + log "Error: Failed to download pac file from ${pacFileDownloadUrl}. HTTP status code: $http_status" + rm -f "$TEMP_PAC_PATH" # Clean up temporary file + exit 2 + fi + + # Verify the downloaded file is not empty + if [[ ! -s "$TEMP_PAC_PATH" ]]; then + log "Error: The downloaded pac file is empty." + rm -f "$TEMP_PAC_PATH" # Clean up temporary file + exit 3 + fi + + # Log the download success + log "Pac file downloaded successfully. Proceeding with update..." + + # Copy the content from the temporary pac file to the target file + log "Copying the content from temporary file to the target pac file at $LOCAL_PAC_PATH..." + + # Check if the copy was successful + if cat "$TEMP_PAC_PATH" > "$LOCAL_PAC_PATH"; then + log "Pac file successfully updated at $LOCAL_PAC_PATH." + else + log "Error: Failed to update the pac file at $LOCAL_PAC_PATH." + rm -f "$TEMP_PAC_PATH" # Clean up temporary file + exit 4 + fi + + # Clean up temporary file + rm -f "$TEMP_PAC_PATH" + + log "Pac file fetch and update process completed successfully." + exit 0 + ''; + }; + in { name = "${name}"; @@ -36,7 +107,7 @@ in pkgs.openconnect gnomeTextEditor pkgs.xarchiver - + pkgs.busybox ] ++ lib.optionals config.ghaf.profiles.debug.enable [ pkgs.tcpdump ] ++ lib.optionals config.ghaf.givc.enable [ pkgs.open-normal-extension ]; @@ -71,20 +142,20 @@ in applications = [ { name = "google-chrome"; - command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --enable-features=UseOzonePlatform --ozone-platform=wayland ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; + command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --proxy-pac-url=${pacFileUrl} --enable-features=UseOzonePlatform --ozone-platform=wayland ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; args = [ "url" ]; } { name = "outlook"; - command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --enable-features=UseOzonePlatform --ozone-platform=wayland --app=https://outlook.office.com/mail/ ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; + command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --proxy-pac-url=${pacFileUrl} --enable-features=UseOzonePlatform --ozone-platform=wayland --app=https://outlook.office.com/mail/ ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; } { name = "office"; - command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --enable-features=UseOzonePlatform --ozone-platform=wayland --app=https://microsoft365.com ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; + command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --proxy-pac-url=${pacFileUrl} --enable-features=UseOzonePlatform --ozone-platform=wayland --app=https://microsoft365.com ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; } { name = "teams"; - command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --enable-features=UseOzonePlatform --ozone-platform=wayland --app=https://teams.microsoft.com ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; + command = "${config.ghaf.givc.appPrefix}/run-waypipe ${config.ghaf.givc.appPrefix}/google-chrome-stable --proxy-pac-url=${pacFileUrl} --enable-features=UseOzonePlatform --ozone-platform=wayland --app=https://teams.microsoft.com ${config.ghaf.givc.idsExtraArgs} --load-extension=${pkgs.open-normal-extension}"; } { name = "gpclient"; @@ -107,7 +178,6 @@ in reference = { programs.google-chrome.enable = true; - services.globalprotect = { enable = true; csdWrapper = "${pkgs.openconnect}/libexec/openconnect/hipreport.sh"; @@ -149,13 +219,66 @@ in # Enable dconf and icon pack for gnome text editor programs.dconf.enable = true; environment.systemPackages = [ pkgs.adwaita-icon-theme ]; + # Define a new group for proxy management + users.groups.${proxyGroupName} = { }; # Create a group named proxy-admin + + # Define a new user with a specific username + users.users.${proxyUserName} = { + isSystemUser = true; + description = "Proxy User for managing allowlist and services"; + # extraGroups = [ "${proxyGroupName}" ]; # Adding to 'proxy-admin' for specific access + group = "${proxyGroupName}"; + }; + + environment.etc."proxy/${pacFileName}" = { + text = ''''; + user = "${proxyUserName}"; # Owner is proxy-user + group = "${proxyGroupName}"; # Group is proxy-admin + mode = "0664"; # Permissions: read/write for owner/group, no permissions for others + }; + + systemd.services.pacServer = { + description = "Http server to make PAC file accessible for web browsers"; + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + ExecStart = "${pkgs.busybox}/bin/busybox httpd -f -p ${pacServerAddr} -h /etc/proxy"; + # Ensure ghafFetchUrl starts after the network is up + Type = "simple"; + # Restart policy on failure + Restart = "always"; # Restart the service if it fails + RestartSec = "15s"; # Wait 15 seconds before restarting + User = "${proxyUserName}"; + }; + }; + + systemd.services.ghafPacFileFetcher = { + description = "Fetch ghaf pac file periodically with retries if internet is available"; + + serviceConfig = { + ExecStart = "${_ghafPacFileFetcher}/bin/ghafPacFileFetcher"; + # Ensure ghafFetchUrl starts after the network is up + Type = "simple"; + # Restart policy on failure + Restart = "on-failure"; # Restart the service if it fails + RestartSec = "15s"; # Wait 15 seconds before restarting + User = "${proxyUserName}"; + }; + }; + + systemd.timers.ghafPacFileFetcher = { + description = "Run ghafPacFileFetcher periodically"; + wantedBy = [ "timers.target" ]; + timerConfig = { + User = "${proxyUserName}"; + Persistent = true; # Ensures the timer runs after a system reboot + OnCalendar = "daily"; # Set to your desired schedule + OnBootSec = "90s"; + }; + }; #Firewall Settings networking = { - proxy = { - default = "http://${netvmAddress}:${toString config.ghaf.reference.services.proxy-server.bindPort}"; - noProxy = "192.168.101.10,${adminvmAddress},127.0.0.1,localhost,${vpnOnlyAddr}"; - }; firewall = { enable = true; extraCommands = '' @@ -170,6 +293,9 @@ in # Default policy iptables -P INPUT DROP + iptables -A INPUT -i lo -j ACCEPT + iptables -A OUTPUT -o lo -j ACCEPT + # Block any other unwanted traffic (optional) iptables -N logreject iptables -A logreject -j LOG diff --git a/modules/reference/programs/google-chrome.nix b/modules/reference/programs/google-chrome.nix index 9ef6ded39..c54a36d4b 100644 --- a/modules/reference/programs/google-chrome.nix +++ b/modules/reference/programs/google-chrome.nix @@ -20,8 +20,6 @@ in PromptForDownloadLocation = true; AlwaysOpenPdfExternally = true; DefaultBrowserSettingEnabled = true; - StartupBrowserWindowLaunchSuppressed = true; - DeviceMetricsReportingEnabled = false; MetricsReportingEnabled = false; }; example = lib.literalExpression '' @@ -30,6 +28,7 @@ in } ''; }; + extraOpts = lib.mkOption { type = lib.types.attrs; description = '' @@ -38,9 +37,9 @@ in Make sure the selected policy is supported on Linux and your browser version. ''; - default = { - - }; + default = + { + }; example = lib.literalExpression '' { "BrowserSignin" = 0; @@ -54,15 +53,33 @@ in } ''; }; + + policyOwner = lib.mkOption { + type = lib.types.str; + default = "root"; + description = "Policy files owner"; + }; + + policyOwnerGroup = lib.mkOption { + type = lib.types.str; + default = "root"; + description = "Policy files group"; + }; }; config = lib.mkIf cfg.enable { environment.etc = { "opt/chrome/policies/managed/default.json" = lib.mkIf (cfg.defaultPolicy != { }) { text = builtins.toJSON cfg.defaultPolicy; + user = "${cfg.policyOwner}"; # Owner is proxy-user + group = "${cfg.policyOwnerGroup}"; # Group is proxy-admin + mode = "0664"; # Permissions: read/write for owner/group, no permissions for others }; - "opt/chrome/policies/managed/extra.json" = lib.mkIf (cfg.extraOpts != { }) { + "opt/chrome/policies/managed/extra.json" = { text = builtins.toJSON cfg.extraOpts; + user = "${cfg.policyOwner}"; # Owner is proxy-user + group = "${cfg.policyOwnerGroup}"; # Group is proxy-admin + mode = "0664"; # Permissions: read/write for owner/group, no permissions for others }; }; diff --git a/modules/reference/services/proxy-server/3proxy-config.nix b/modules/reference/services/proxy-server/3proxy-config.nix index 5bae71611..502a9a6ba 100644 --- a/modules/reference/services/proxy-server/3proxy-config.nix +++ b/modules/reference/services/proxy-server/3proxy-config.nix @@ -9,14 +9,20 @@ let cfg = config.ghaf.reference.services.proxy-server; inherit (lib) mkEnableOption mkIf; - proxyUserName = "proxy-user"; proxyGroupName = "proxy-admin"; - proxyAllowListName = "allowlist.txt"; - proxyWritableAllowListPath = "/etc/${proxyAllowListName}"; - ms-url-fetcher = pkgs.callPackage ./ms_url_fetcher.nix { - allowListPath = proxyWritableAllowListPath; - }; + url-fetcher = pkgs.callPackage ./url_fetcher.nix { }; + + msUrls = "https://endpoints.office.com/endpoints/worldwide?clientrequestid=b10c5ed1-bad1-445f-b386-b919946339a7"; + ghafUrls = "https://api.github.com/repos/tiiuae/ghaf-rt-config/contents/network/proxy/urls?ref=main"; + + msAllowFilePath = "3proxy/ms_whitelist.txt"; + ghafAllowFilePath = "3proxy/ghaf_whitelist.txt"; + + allowListPaths = [ + msAllowFilePath + ghafAllowFilePath + ]; _3proxy-restart = pkgs.writeShellApplication { name = "3proxy-restart"; @@ -40,90 +46,6 @@ let echo "3proxy service successfully started" ''; }; - tiiUrls = [ - #for jira avatars - "*.gravatar.com" - # for confluence icons - "*.atlassian.com" - "*tii.ae" - "*tii.org" - "tiiuae.sharepoint.com" - "tiiuae-my.sharepoint.com" - "hcm22.sapsf.com" - "aderp.addigital.gov.ae" - "s1.mn1.ariba.com" - "tii.sourcing.mn1.ariba.com" - "a1c7ohabl.accounts.ondemand.com" - "flpnwc-ojffapwnic.dispatcher.ae1.hana.ondemand.com" - "*.docusign.com" - "access.clarivate.com" - ]; - - ssrcUrls = [ - "*.cachix.org" - "vedenemo.dev" - "loki.ghaflogs.vedenemo.dev" - "ghaflogs.vedenemo.dev" - "himalia.vedenemo.dev" - ]; - - extraMsUrls = [ - #ms366 - "graph.microsoft.com" - "ocws.officeapps.live.com" - "microsoft365.com" - "*.azureedge.net" # microsoft365 icons - "consentreceiverfd-prod.azurefd.net" # ms365 cookies - "c.s-microsoft.com" - "js.monitor.azure.com" - "ocws.officeapps.live.com" - "northcentralus0-mediap.svc.ms" - "*.bing.com" - "cdnjs.cloudfare.com" - "store-images.s-microsoft.com" - "www.office.com" - "res-1.cdn.office.net" - "secure.skypeassets.com" - "js.live.net" - "skyapi.onedrive.live.com" - "am3pap006files.storage.live.com" - "c7rr5q.am.files.1drv.com" - #teams - "teams.live.com" - "*.teams.live.com" - "fpt.live.com" # teams related - "statics.teams.cdn.live.net" - "ipv6.login.live.com" - #outlook - "outlook.live.com" # outlook login - "csp.microsoft.com" - "arc.msn.com" - "www.msn.com" - "outlook.com" - #https://learn.microsoft.com/en-us/microsoft-365/enterprise/managing-office-365-endpoints?view=o365-worldwide#why-do-i-see-names-such-as-nsatcnet-or-akadnsnet-in-the-microsoft-domain-names - "*.akadns.net" - "*.akam.net" - "*.akamai.com" - "*.akamai.net" - "*.akamaiedge.net" - "*.akamaihd.net" - "*.akamaized.net" - "*.edgekey.net" - "*.edgesuite.net" - "*.nsatc.net" - "*.exacttarget.com" - #onedrive - "1drv.ms" - "onedrive.live.com" - "p.sfx.ms" - "my.microsoftpersonalcontent.com" - "*.onedrive.com" - "cdn.onenote.net" - "wvcyna.db.files.1drv.com" - "*.storage.live.com" - ]; - # Concatenate the lists and join with commas - concatenatedUrls = builtins.concatStringsSep "," (tiiUrls ++ ssrcUrls ++ extraMsUrls); config_file_content = '' # log to stdout @@ -136,9 +58,9 @@ let #private addresses deny * * 0.0.0.0/8,127.0.0.0/8,10.0.0.0/8,100.64.0.0/10,172.16.0.0/12,192.168.0.0/16,::,::1,fc00::/7 - allow * * ${concatenatedUrls} * #include dynamic whitelist ips - include "${proxyWritableAllowListPath}" + include "/etc/${msAllowFilePath}" + include "/etc/${ghafAllowFilePath}" deny * * * * maxconn 200 @@ -177,14 +99,23 @@ in group = "${proxyGroupName}"; }; - # Set up the permissions for allowlist.txt - environment.etc."${proxyAllowListName}" = { - text = ''''; - user = "${proxyUserName}"; # Owner is proxy-user - group = "${proxyGroupName}"; # Group is proxy-admin - mode = "0660"; # Permissions: read/write for owner/group, no permissions for others - }; - + # Apply the allowListConfig generated from the list + + # Create environment.etc configuration for each allow list path + # Loop over the allowListPaths and apply the configuration directly + environment.etc = builtins.foldl' ( + acc: path: + acc + // { + "${path}" = { + text = ''''; + user = "${proxyUserName}"; # Owner is proxy-user + group = "${proxyGroupName}"; # Group is proxy-admin + mode = "0660"; # Permissions: read/write for owner/group, no permissions for others + }; + } + ) { } allowListPaths; + # Apply the configurations for each allow list path # Allow proxy-admin group to manage specific systemd services without a password security = { polkit = { @@ -207,7 +138,7 @@ in }; - environment.systemPackages = [ ms-url-fetcher ]; + environment.systemPackages = [ url-fetcher ]; #Firewall Settings networking = { firewall.enable = true; @@ -218,12 +149,12 @@ in ''; }; # systemd service for fetching the file - systemd.services.fetchFile = { - description = "Fetch a file periodically with retries if internet is available"; + systemd.services.msFetchUrl = { + description = "Fetch microsoft URLs periodically with retries if internet is available"; serviceConfig = { - ExecStart = "${ms-url-fetcher}/bin/ms-url-fetch"; - # Ensure fetchFile starts after the network is up + ExecStart = "${url-fetcher}/bin/url-fetcher -u ${msUrls} -p /etc/${msAllowFilePath}"; + # Ensure msFetchUrl starts after the network is up Type = "simple"; # Retry until systemctl restart 3proxy succeeds ExecStartPost = "${_3proxy-restart}/bin/3proxy-restart"; @@ -235,8 +166,8 @@ in }; # systemd timer to trigger the service every 10 minutes - systemd.timers.fetchFile = { - description = "Run fetch-file periodically"; + systemd.timers.msFetchUrl = { + description = "Run msFetchUrl periodically"; wantedBy = [ "timers.target" ]; timerConfig = { User = "${proxyUserName}"; @@ -246,6 +177,35 @@ in }; }; + # systemd service for fetching the file + systemd.services.ghafFetchUrl = { + description = "Fetch ghaf related URLs periodically with retries if internet is available"; + + serviceConfig = { + ExecStart = "${url-fetcher}/bin/url-fetcher -f ${ghafUrls} -p /etc/${ghafAllowFilePath}"; + # Ensure ghafFetchUrl starts after the network is up + Type = "simple"; + # Retry until systemctl restart 3proxy succeeds + ExecStartPost = "${_3proxy-restart}/bin/3proxy-restart"; + # Restart policy on failure + Restart = "on-failure"; # Restart the service if it fails + RestartSec = "15s"; # Wait 15 seconds before restarting + User = "${proxyUserName}"; + }; + }; + + # systemd timer to trigger the service every 10 minutes + systemd.timers.ghafFetchUrl = { + description = "Run ghafFetchUrl periodically"; + wantedBy = [ "timers.target" ]; + timerConfig = { + User = "${proxyUserName}"; + Persistent = true; # Ensures the timer runs after a system reboot + OnCalendar = "hourly"; # Set to your desired schedule + OnBootSec = "90s"; + }; + }; + systemd.services."3proxy".serviceConfig = { RestartSec = "5s"; User = "${proxyUserName}"; @@ -258,37 +218,6 @@ in confFile = pkgs.writeText "3proxy.conf" '' ${config_file_content} ''; - - /* - NOTE allow and deny configurations should must be placed before the other configs - it is not possible to do with extraConfig. Because it appends the file - */ - /* - services = [ - { - type = "proxy"; - bindAddress = "${netvmAddr}"; - inherit (cfg) bindPort; - maxConnections = 200; - auth = [ "iponly" ]; - acl = [ - { - rule = "allow"; - targets = tiiUrls; - } - { - rule = "allow"; - targets = ssrcUrls; - } - { - rule = "allow"; - targets = extraMsUrls; - } - { rule = "deny"; } - ]; - } - ]; - */ }; }; diff --git a/modules/reference/services/proxy-server/ms_url_fetcher.nix b/modules/reference/services/proxy-server/ms_url_fetcher.nix deleted file mode 100644 index 57ea3721f..000000000 --- a/modules/reference/services/proxy-server/ms_url_fetcher.nix +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2024 TII (SSRC) and the Ghaf contributors -# SPDX-License-Identifier: Apache-2.0 -{ - writeShellApplication, - lib, - pkgs, - allowListPath, - ... -}: -let - url = "https://endpoints.office.com/endpoints/worldwide?clientrequestid=b10c5ed1-bad1-445f-b386-b919946339a7"; - logTag = "ms-url-fetcher"; -in -writeShellApplication { - name = "ms-url-fetch"; - runtimeInputs = [ - pkgs.inetutils - pkgs.curl - pkgs.jq - ]; - text = '' - # Function to write to the allow list - write_to_allow_list() { - - local processedUrls="$1" - local allowListPath="$2" - - { - printf "allow * * " || { logger -t ms-url-fetcher "Failed to print prefix"; return 1; } - echo "$processedUrls" || { logger -t ms-url-fetcher "Failed to echo processed URLs"; return 1; } - } > "$allowListPath" || { logger -t ms-url-fetcher "Failed to write to $allowListPath"; return 2; } - return 0 # Indicate success - } - # Check if the device is connected to the internet. - if ping -c 1 8.8.8.8 &> /dev/null; then - logger -t ${logTag} "Fetching the Microsoft URLs from ${url}" - - # Fetch the JSON file using curl with retry logic - if curl_output=$(curl -s --retry 5 --retry-delay 10 --retry-connrefused "${url}"); then - msurl_output=$(echo "$curl_output" | jq -r '.[]? | select(.category == "Optimize" or .category == "Allow" or .category == "Default") | .urls[]?' | sort | uniq) - # Check if msurl_output is empty - if [ -z "$msurl_output" ]; then - logger -t ${logTag} "No valid URLs found in the fetched data." - exit 4 # No URLs found error - fi - - # Convert the list of URLs into a comma-separated format and save to allowListPath - processedUrls=$(echo "$msurl_output" | tr '\n' ',' | sed 's/,$//'); - - - - # Add the prefix once and save to allowListPath - if write_to_allow_list "$processedUrls" "${allowListPath}"; then - logger -t ${logTag} "Microsoft URLs fetched and saved to ${allowListPath} successfully" - exit 0 # Success exit code - else - logger -t ${logTag} "Failed to process Microsoft URLs with jq" - exit 2 # JQ processing error - fi - else - logger -t ${logTag} "Failed to fetch Microsoft URLs after multiple attempts" - exit 1 # Curl fetching error - fi - else - logger -t ${logTag} "No internet connection. Microsoft URLs not fetched." - exit 3 # No internet connection error - fi - ''; - meta = with lib; { - description = " - The application is a shell script designed to fetch a list of Microsoft URLs - from a specified endpoint and save them to an allow list file. The script includes error - handling and retry logic to ensure robustness in various network conditions. - "; - }; -} diff --git a/modules/reference/services/proxy-server/url_fetcher.nix b/modules/reference/services/proxy-server/url_fetcher.nix new file mode 100644 index 000000000..888915a8b --- /dev/null +++ b/modules/reference/services/proxy-server/url_fetcher.nix @@ -0,0 +1,144 @@ +# Copyright 2024 TII (SSRC) and the Ghaf contributors +# SPDX-License-Identifier: Apache-2.0 +{ + writeShellApplication, + lib, + pkgs, + ... +}: +let + logTag = "url-fetcher"; +in +writeShellApplication { + name = "url-fetcher"; + runtimeInputs = [ + pkgs.inetutils + pkgs.curl + pkgs.jq + pkgs.gawk + ]; + text = '' + # Default values for variables + url="" + url_folder="" + allowListPath="" + # Function to write to the allow list + write_to_allow_list() { + local processedUrls="$1" + local allowListPath="$2" + + { + # Ensure the "allow" prefix is written + printf "allow * * " || { logger -t ${logTag} "Failed to print prefix"; return 1; } + echo "$processedUrls" || { logger -t ${logTag} "Failed to echo processed URLs"; return 1; } + } > "$allowListPath" || { logger -t ${logTag} "Failed to write to $allowListPath"; return 2; } + + return 0 # Indicate success + } + + # Function to fetch and process URLs from a JSON file + fetch_and_process_url() { + local file_url="$1" + + # Fetch and parse the JSON + if json_content=$(curl -s --retry 5 --retry-delay 10 --retry-connrefused "$file_url"); then + echo "$json_content" | jq -r '.[]? | select(.category == "Optimize" or .category == "Allow" or .category == "Default") | .urls[]?' | sort | uniq + else + logger -t ${logTag} "Failed to fetch or parse JSON from $file_url" + return 1 + fi + } + + # Parse command line arguments + while getopts "u:f:p:" opt; do + case $opt in + u) url="$OPTARG" ;; # Single JSON file URL to fetch + f) url_folder="$OPTARG" ;; # Folder API URL containing JSON files + p) allowListPath="$OPTARG" ;; # Path to the allow list file + \?) echo "Usage: $0 -u | -f -p " + exit 1 ;; + esac + done + + # Validate input parameters + if [[ -z "$allowListPath" ]]; then + echo "Error: Allow List Path (-p) must be provided." + echo "Usage: $0 -u | -f -p " + exit 1 + fi + + if [[ -n "$url" && -n "$url_folder" ]]; then + echo "Error: Only one of -u or -f should be provided, not both." + exit 1 + elif [[ -z "$url" && -z "$url_folder" ]]; then + echo "Error: One of -u or -f must be provided." + exit 1 + fi + + # Check if the device is connected to the internet + if ! ping -c 1 8.8.8.8 &> /dev/null; then + logger -t ${logTag} "No internet connection. URLs not fetched." + exit 3 + fi + + # Process a single URL (-u option) + all_urls="" + if [[ -n "$url" ]]; then + logger -t ${logTag} "Fetching URLs from $url" + + # Fetch and process the single JSON file + fetched_urls=$(fetch_and_process_url "$url") + if [[ -z "$fetched_urls" ]]; then + logger -t ${logTag} "No valid URLs found in the file $url" + exit 4 + fi + all_urls="$fetched_urls" + fi + + # Process a folder of JSON files (-f option) + if [[ -n "$url_folder" ]]; then + logger -t ${logTag} "Fetching JSON files from folder $url_folder" + + # Use the folder URL directly as the API endpoint + folder_api_url="$url_folder" + + # Fetch the folder contents from the API + folder_response=$(curl -s -H "Accept: application/vnd.github.v3+json" "$folder_api_url") + + # Extract JSON file URLs + file_urls=$(echo "$folder_response" | jq -r '.[] | select(.name | endswith(".json")) | .download_url') + + if [[ -z "$file_urls" ]]; then + logger -t ${logTag} "No JSON files found in folder $folder_api_url" + exit 4 + fi + + # Process each JSON file URL + for file_url in $file_urls; do + fetched_urls=$(fetch_and_process_url "$file_url") + all_urls+="$fetched_urls"$'\n' + done + fi + + # Deduplicate and format URLs + all_urls=$(echo "$all_urls" | sort | uniq | tr '\n' ',') # Sort, deduplicate, join with commas + all_urls=$(echo "$all_urls" | awk '{sub(/^,/, ""); print}') + all_urls=$(echo "$all_urls" | awk '{gsub(/^,|,$/, ""); print}') + + # Write to the allow list + if write_to_allow_list "$all_urls" "$allowListPath"; then + logger -t ${logTag} "URLs fetched and saved to $allowListPath successfully" + exit 0 # Success + else + logger -t ${logTag} "Failed to save URLs to allow list" + exit 2 + fi + ''; + meta = with lib; { + description = " + The application is a shell script designed to fetch a list of URLs + from a specified endpoint and save them to an allow list file. The script includes error + handling and retry logic to ensure robustness in various network conditions. + "; + }; +}