Skip to content

Commit

Permalink
Secondary HTCondor cluster playbook (central manager)
Browse files Browse the repository at this point in the history
Add a playbook that sets up the central manager for the secondary HTCondor cluster. It also enables the same machine to submit jobs (submit role).

The playbook is written under the assumption that the job working directories are already mounted, that the firewall ports are open, and that user ids and group ids are configured correctly. Further modifications may be needed regarding these points.
  • Loading branch information
kysrpex committed Oct 18, 2023
1 parent 28b879d commit 22c49ba
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 0 deletions.
14 changes: 14 additions & 0 deletions group_vars/central-manager-secondary.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Configure the secondary HTCondor cluster (central manager and submit roles).
---
condor_host: "condor-cm-secondary.galaxyproject.eu"
condor_port: "9628"

htcondor_version: 10.0
htcondor_channel: 10.0
htcondor_domain: bi.uni-freiburg.de
htcondor_role_execute: false
htcondor_role_submit: true
htcondor_role_manager: true
htcondor_server: "{{ condor_host }}"
htcondor_firewall_condor: false
htcondor_firewall_nfs: false
3 changes: 3 additions & 0 deletions hosts
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ ansible_ssh_user=centos

[central-manager]
manager.vgcn.galaxyproject.eu ansible_ssh_user=root

[central-manager-secondary]
manager-secondary.galaxyproject.eu ansible_host=127.0.0.1 ansible_port=2222 ansible_ssh_user=root ansible_ssh_common_args='-o HostKeyAlias=manager-secondary.galaxyproject.eu -o ProxyCommand="ssh -W %h:%p -q [email protected]"'
40 changes: 40 additions & 0 deletions htcondor-secondary.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
- name: Secondary HTCondor 10 cluster.
hosts: central-manager-secondary
vars_files:
- group_vars/sn06.yml
- group_vars/central-manager-secondary.yml
- secret_group_vars/central-manager-secondary.yml
handlers:
- name: Reload HTCondor
when: "'condor_service' in service_facts.ansible_facts.services and \
service_facts.ansible_facts.services['condor.service'].state == 'running'"
become: true
ansible.builtin.service:
name: condor
state: reloaded
pre_tasks:
- name: Ensure the HTCondor configuration directory exists.
become: true
ansible.builtin.file:
path: /etc/condor
state: directory
owner: root
group: root
mode: "0755"

- name: Template HTCondor configuration.
become: true
ansible.builtin.template:
src: htcondor-secondary/condor_config.local.j2
dest: /etc/condor/condor_config.local
owner: root
group: root
mode: "0644"
notify: Reload HTCondor

- name: Check if HTCondor is running.
ansible.builtin.service_facts:
register: service_facts
roles:
- grycap.htcondor
3 changes: 3 additions & 0 deletions requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ roles:
version: 0.0.1
- name: usegalaxy_eu.htcondor
version: 1.0.1
- name: grycap.htcondor
src: https://github.com/kysrpex/grycap-ansible-role-htcondor
version: ed8519cdb95d78e039a69a40c08dbbe4f6e2f5da
- name: usegalaxy-eu.update-hosts
src: https://github.com/usegalaxy-eu/ansible-update-hosts
version: 0.2.0
Expand Down
8 changes: 8 additions & 0 deletions secret_group_vars/central-manager-secondary.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
$ANSIBLE_VAULT;1.1;AES256
36666533303537633565363734396532643836653734366632346135356131613133666638393663
6366633866386138613437373231336333303737653864640a326439303831646633666337323831
30306137393664643636363439346636643133663433623532366462323830633963626264373137
3266636336653731320a666162663465626563656162333164663937346138613932383533303733
36623038313362623839386538366536363933383039623836393166613264613035363032353761
63613431626139323234653636663737353931373637376661393834623664663137323636316162
323437636137613639353662316431653936
46 changes: 46 additions & 0 deletions templates/htcondor-secondary/condor_config.local.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
CONDOR_HOST = {{ condor_host }}
COLLECTOR_HOST = $(CONDOR_HOST):{{ condor_port }}
SHARED_PORT_PORT = {{ condor_port }}
WANT_UDP_COMMAND_SOCKET = False
UPDATE_COLLECTOR_WITH_TCP = True
UPDATE_VIEW_COLLECTOR_WITH_TCP = True

ALLOW_WRITE = {{ condor_allow_write }}
ALLOW_READ = $(ALLOW_WRITE)
ALLOW_NEGOTIATOR = {{ condor_allow_negotiator }}

{% if condor_allow_administrator is defined %}
ALLOW_ADMINISTRATOR = {{ condor_allow_administrator }}
{% endif %}

ALLOW_OWNER = $(ALLOW_ADMINISTRATOR)
ALLOW_CLIENT = *
DAEMON_LIST = {{ ", ".join(condor_daemons) }}
# Define FS and UID domain
FILESYSTEM_DOMAIN = {{ condor_fs_domain }}
UID_DOMAIN = {{ condor_uid_domain }}
TRUST_UID_DOMAIN = True
SOFT_UID_DOMAIN = True

{% if condor_system_periodic_hold is defined %}
SYSTEM_PERIODIC_HOLD = \
(JobStatus == 1 || JobStatus == 2) && \
((time() - JobStartDate) >= ({{ condor_system_periodic_hold }}))
SYSTEM_PERIODIC_HOLD_REASON = \
ifThenElse(((time() - JobStartDate) >= ({{ condor_system_periodic_hold }}), \
"Maximum wallclock time exceeded", \
"Unspecified reason")
SYSTEM_PERIODIC_REMOVE = \
(JobStatus == 5 && time() - EnteredCurrentStatus > {{ condor_system_periodic_remove }})
{% endif %}

{% if condor_network_interface is defined %}
NETWORK_INTERFACE = {{ condor_network_interface }}
{% endif %}

{% if condor_extra %}
{{ condor_extra }}
{% endif %}

SEC_CLIENT_AUTHENTICATION_METHODS = IDTOKENS, FS
SEC_READ_AUTHENTICATION_METHODS = IDTOKENS, FS

0 comments on commit 22c49ba

Please sign in to comment.