From 22c49ba99b04ae5c7b54756140da6b5ba0309e66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Manuel=20Dom=C3=ADnguez?= Date: Wed, 18 Oct 2023 12:25:38 +0200 Subject: [PATCH] Secondary HTCondor cluster playbook (central manager) Add a playbook that sets up the central manager for the secondary HTCondor cluster. It also enables the same machine to submit jobs (submit role). The playbook is written under the assumption that the job working directories are already mounted, that the firewall ports are open, and that user ids and group ids are configured correctly. Further modifications may be needed regarding these points. --- group_vars/central-manager-secondary.yml | 14 ++++++ hosts | 3 ++ htcondor-secondary.yml | 40 ++++++++++++++++ requirements.yaml | 3 ++ .../central-manager-secondary.yml | 8 ++++ .../htcondor-secondary/condor_config.local.j2 | 46 +++++++++++++++++++ 6 files changed, 114 insertions(+) create mode 100644 group_vars/central-manager-secondary.yml create mode 100644 htcondor-secondary.yml create mode 100644 secret_group_vars/central-manager-secondary.yml create mode 100644 templates/htcondor-secondary/condor_config.local.j2 diff --git a/group_vars/central-manager-secondary.yml b/group_vars/central-manager-secondary.yml new file mode 100644 index 000000000..d3fef515f --- /dev/null +++ b/group_vars/central-manager-secondary.yml @@ -0,0 +1,14 @@ +# Configure the secondary HTCondor cluster (central manager and submit roles). +--- +condor_host: "condor-cm-secondary.galaxyproject.eu" +condor_port: "9628" + +htcondor_version: 10.0 +htcondor_channel: 10.0 +htcondor_domain: bi.uni-freiburg.de +htcondor_role_execute: false +htcondor_role_submit: true +htcondor_role_manager: true +htcondor_server: "{{ condor_host }}" +htcondor_firewall_condor: false +htcondor_firewall_nfs: false diff --git a/hosts b/hosts index 00594fcb5..a965cbc9b 100644 --- a/hosts +++ b/hosts @@ -78,3 +78,6 @@ ansible_ssh_user=centos [central-manager] manager.vgcn.galaxyproject.eu ansible_ssh_user=root + +[central-manager-secondary] +manager-secondary.galaxyproject.eu ansible_host=127.0.0.1 ansible_port=2222 ansible_ssh_user=root ansible_ssh_common_args='-o HostKeyAlias=manager-secondary.galaxyproject.eu -o ProxyCommand="ssh -W %h:%p -q root@sn06.galaxyproject.eu"' diff --git a/htcondor-secondary.yml b/htcondor-secondary.yml new file mode 100644 index 000000000..1ed8a7682 --- /dev/null +++ b/htcondor-secondary.yml @@ -0,0 +1,40 @@ +--- +- name: Secondary HTCondor 10 cluster. + hosts: central-manager-secondary + vars_files: + - group_vars/sn06.yml + - group_vars/central-manager-secondary.yml + - secret_group_vars/central-manager-secondary.yml + handlers: + - name: Reload HTCondor + when: "'condor_service' in service_facts.ansible_facts.services and \ + service_facts.ansible_facts.services['condor.service'].state == 'running'" + become: true + ansible.builtin.service: + name: condor + state: reloaded + pre_tasks: + - name: Ensure the HTCondor configuration directory exists. + become: true + ansible.builtin.file: + path: /etc/condor + state: directory + owner: root + group: root + mode: "0755" + + - name: Template HTCondor configuration. + become: true + ansible.builtin.template: + src: htcondor-secondary/condor_config.local.j2 + dest: /etc/condor/condor_config.local + owner: root + group: root + mode: "0644" + notify: Reload HTCondor + + - name: Check if HTCondor is running. + ansible.builtin.service_facts: + register: service_facts + roles: + - grycap.htcondor diff --git a/requirements.yaml b/requirements.yaml index 70abe7552..5e794cdf9 100644 --- a/requirements.yaml +++ b/requirements.yaml @@ -88,6 +88,9 @@ roles: version: 0.0.1 - name: usegalaxy_eu.htcondor version: 1.0.1 + - name: grycap.htcondor + src: https://github.com/kysrpex/grycap-ansible-role-htcondor + version: ed8519cdb95d78e039a69a40c08dbbe4f6e2f5da - name: usegalaxy-eu.update-hosts src: https://github.com/usegalaxy-eu/ansible-update-hosts version: 0.2.0 diff --git a/secret_group_vars/central-manager-secondary.yml b/secret_group_vars/central-manager-secondary.yml new file mode 100644 index 000000000..77904f6f0 --- /dev/null +++ b/secret_group_vars/central-manager-secondary.yml @@ -0,0 +1,8 @@ +$ANSIBLE_VAULT;1.1;AES256 +36666533303537633565363734396532643836653734366632346135356131613133666638393663 +6366633866386138613437373231336333303737653864640a326439303831646633666337323831 +30306137393664643636363439346636643133663433623532366462323830633963626264373137 +3266636336653731320a666162663465626563656162333164663937346138613932383533303733 +36623038313362623839386538366536363933383039623836393166613264613035363032353761 +63613431626139323234653636663737353931373637376661393834623664663137323636316162 +323437636137613639353662316431653936 diff --git a/templates/htcondor-secondary/condor_config.local.j2 b/templates/htcondor-secondary/condor_config.local.j2 new file mode 100644 index 000000000..554a61380 --- /dev/null +++ b/templates/htcondor-secondary/condor_config.local.j2 @@ -0,0 +1,46 @@ +CONDOR_HOST = {{ condor_host }} +COLLECTOR_HOST = $(CONDOR_HOST):{{ condor_port }} +SHARED_PORT_PORT = {{ condor_port }} +WANT_UDP_COMMAND_SOCKET = False +UPDATE_COLLECTOR_WITH_TCP = True +UPDATE_VIEW_COLLECTOR_WITH_TCP = True + +ALLOW_WRITE = {{ condor_allow_write }} +ALLOW_READ = $(ALLOW_WRITE) +ALLOW_NEGOTIATOR = {{ condor_allow_negotiator }} + +{% if condor_allow_administrator is defined %} +ALLOW_ADMINISTRATOR = {{ condor_allow_administrator }} +{% endif %} + +ALLOW_OWNER = $(ALLOW_ADMINISTRATOR) +ALLOW_CLIENT = * +DAEMON_LIST = {{ ", ".join(condor_daemons) }} +# Define FS and UID domain +FILESYSTEM_DOMAIN = {{ condor_fs_domain }} +UID_DOMAIN = {{ condor_uid_domain }} +TRUST_UID_DOMAIN = True +SOFT_UID_DOMAIN = True + +{% if condor_system_periodic_hold is defined %} +SYSTEM_PERIODIC_HOLD = \ + (JobStatus == 1 || JobStatus == 2) && \ + ((time() - JobStartDate) >= ({{ condor_system_periodic_hold }})) +SYSTEM_PERIODIC_HOLD_REASON = \ + ifThenElse(((time() - JobStartDate) >= ({{ condor_system_periodic_hold }}), \ + "Maximum wallclock time exceeded", \ + "Unspecified reason") +SYSTEM_PERIODIC_REMOVE = \ + (JobStatus == 5 && time() - EnteredCurrentStatus > {{ condor_system_periodic_remove }}) +{% endif %} + +{% if condor_network_interface is defined %} +NETWORK_INTERFACE = {{ condor_network_interface }} +{% endif %} + +{% if condor_extra %} +{{ condor_extra }} +{% endif %} + +SEC_CLIENT_AUTHENTICATION_METHODS = IDTOKENS, FS +SEC_READ_AUTHENTICATION_METHODS = IDTOKENS, FS