diff --git a/.gitignore b/.gitignore index c8b241f2..e420ee4b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -target/* \ No newline at end of file +target/* diff --git a/ansible/.gitignore b/ansible/.gitignore index 99ed0d43..032107fb 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -1 +1,4 @@ notes.txt +ansible.cfg +.yamlfmt +.run.sh diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 00000000..917e91e1 --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,268 @@ +# LARD on OpenStack 2 + +## Get access to OpenStack + +You need to create application credentials in the project you are going to +create the instances in, so that the ansible scripts can connect to the right +`ostack_cloud` (in our case it's `lard`). + +The file should exist in `~/.config/openstack/clouds.yml`. +If you have MET access see what is written at the start of the readme [here](https://gitlab.met.no/it/infra/ostack-ansible21x-examples) +or in the authentication section [here](https://gitlab.met.no/it/infra/ostack-doc/-/blob/master/ansible-os.md?ref_type=heads). + +## Dependencies + +- Python 3.10+ + +- On your terminal run the following: + + ```terminal + python3 -m venv ~/.venv/lard + source ~/.venv/lard/bin/activate + + pip install -r requirements.txt + ansible-galaxy collection install -fr requirements.yml + ``` + +## Setup + +### 1. Provision! + +> [!IMPORTANT] +> Add your public key to the Ostack GUI. +> Go to "Compute" then "Key Pairs" and import your public key for later use during this step. + +The IPs associated to the hosts in `inventory.yml` should correspond to +floating IPs you have requested in the network section of the OpenStack GUI. +These IPs are stored in the `ansible_host` variables inside each `host_vars\host_name.yml`. + +Private variables are encrypted with `ansible-vault` and stored inside different files per role in `group_vars/servers/vault`. +You can either decrypt them beforehand, or pass the `-J` flag to ansible when running the playbooks. +Passwords can be found in [CICD variables](https://gitlab.met.no/met/obsklim/bakkeobservasjoner/lagring-og-distribusjon/db-products/poda/-/settings/ci_cd). + +```terminal +ansible-playbook -i inventory.yml -e key_name=... provision.yml +``` + +> [!NOTE] +> If the network has already been setup and you only need to rebuild the VMs, you can do so with +> +> ```terminal +> ansible-playbook -i inventory.yml -e key_name=... provision.yml --skip-tags network +> ``` + +### 2. Configure! + +The floating IP (`fip`) being passed in here is the one that gets associated with the primary, and moved when doing a switchover. + +> [!NOTE] +> The floating IP association times out, but this is ignored as it is a known bug. + +```term +ansible-playbook -i inventory.yml -e fip=... -e db_password=... -e repmgr_password=... configure.yml +``` + +The parts to do with the floating IP that belongs to the primary (ipalias) are based on this [repo](https://gitlab.met.no/ansible-roles/ipalias/-/tree/master?ref_type=heads). + +#### SSH into the VMs + +It might be helpful to create host aliases and add them to your `~/.ssh/config` file, +so you don't have to remember the IPs by heart. An example host alias looks like the following: + +```ssh +Host lard-a + HostName 157.249.*.* + User ubuntu +``` + +Then run: + +```terminal +ssh lard-a +PGPASSWORD=... psql -h localhost -p 5432 -U lard_user -d lard +``` + +> [!NOTE] +> You can also connect from your computer, but +> unfortunately the ssh alias does not work for psql. +> You can define a separate service inside `~/.pg_service.conf` +> +> ``` +> [lard-a] +> host=157.249.*.* +> port=5432 +> user=lard_user +> dbname=lard +> password=... +> ``` +> +> And then +> +> ```terminal +> psql service=lard-a +> ``` + +#### Checking the status of the cluster + +After `ssh`ing on the server and becoming postgres user (`sudo su postgres`), you can check the repmgr status with: + +```terminal +postgres@lard-a:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check +Node "lard-a": + Server role: OK (node is primary) + Replication lag: OK (N/A - node is primary) + WAL archiving: OK (0 pending archive ready files) + Upstream connection: OK (N/A - node is primary) + Downstream servers: OK (1 of 1 downstream nodes attached) + Replication slots: OK (node has no physical replication slots) + Missing physical replication slots: OK (node has no missing physical replication slots) + Configured data directory: OK (configured "data_directory" is "/mnt/ssd-data/16/main") +``` + +```terminal +postgres@lard-b:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check +Node "lard-b": + Server role: OK (node is standby) + Replication lag: OK (0 seconds) + WAL archiving: OK (0 pending archive ready files) + Upstream connection: OK (node "lard-b" (ID: 2) is attached to expected upstream node "lard-a" (ID: 1)) + Downstream servers: OK (this node has no downstream nodes) + Replication slots: OK (node has no physical replication slots) + Missing physical replication slots: OK (node has no missing physical replication slots) + Configured data directory: OK (configured "data_directory" is "/mnt/ssd-data/16/main") +``` + +While a few of the configurations are found in `/etc/postgresql/16/main/postgresql.conf`, many of them +can only be seen in `/mnt/ssd-data/16/main/postgresql.auto.conf` (need `sudo` to see contents). + +### 3. Deploy LARD + +This is as simple as running + +```terminal +ansible-playbook -i inventory.yml deploy.yml +``` + +### 4. Teardown + +> [!TODO] +> This should be automated if possible + +If you need to delete the old VMs (Compute -> Instances) and Volumes (Volumes +-> Volumes) you can do so in the OpenStack GUI. + +> [!CAUTION] +> When deleting things to build up again, if for some reason one of the IPs +> does not get disassociated properly, you have to do it manually from the GUI (Network -> Floating IPs). + +## Switchover + +### 1. Planned maintenance + +This should only be used when both VMs are up and running, like in the case of planned maintenance on one data room. +You can use this script to switch the primary to the data room that will stay available ahead of time. + +**Make sure you are aware which one is the master, and put the names the right way around in this call.** + +> **TODO**: This should be automated + +``` +ansible-playbook -i inventory.yml -e primary=... -e standby=... -e fip=... switchover.yml +``` + +This should also be possible to do manually, you might need to follow what is done in the ansible script (aka restarting postgres on both VMs), +then performing the switchover (as the `postgres` user): + +```terminal +repmgr standby switchover -f /etc/repmgr.conf --siblings-follow +``` + +### Promote standby (assuming the primary is down) + +This is used in the case where the primary has gone down (e.g. unplanned downtime of a data room). +Make sure you are know which one you want to promote! + +**Manually:** + +1. `ssh` into the standby + +1. Check the status + + ```terminal + repmgr -f /etc/repmgr.conf cluster show + ``` + + The primary should say its **uncreachable** + +1. Then promote the standby to primary (while `ssh`-ed into the standby VM) + + ```terminal + repmgr -f /etc/repmgr.conf standby promote + ``` + +1. You can the check then status again (and now the old primary will say **failed**) + +1. Then move the ip in the OpenStack GUI (see in network -> floating ips, dissasociate it then associated it with the ipalias port on the other VM) + +#### Later, when the old primary comes back up + +The cluster will be in a slightly confused state, because this VM still thinks its a primary (although repmgr tells it the other one is running as a primary as well). If the setup is running as asynchronous we could lose data that wasn't copied over before the crash, if running synchronously then there should be no data loss. + +SSH into the new primary +`repmgr -f /etc/repmgr.conf cluster show` +says: + +- node "lard-a" (ID: 1) is running but the repmgr node record is inactive + +SSH into the old primary +`repmgr -f /etc/repmgr.conf cluster show` +says: + +- node "lard-b" (ID: 2) is registered as standby but running as primary + +With a **playbook** (`rejoin_ip` is the ip of the primary node that has been down and should now be a standby): + +``` +ansible-playbook -i inventory.yml -e rejoin_ip=... -e primary_ip=... rejoin.yml +``` + +Or **manually**: +Make sure the pg process is stopped (see fast stop command) if it isn't already + +Become postgres user: +`sudo su postgres` +Test the rejoin (host is the IP of the new / current primary, aka the other VM) +`repmgr node rejoin -f /etc/repmgr.conf -d 'host=157.249.*.* user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose --dry-run` +Perform a rejoin +`repmgr node rejoin -f /etc/repmgr.conf -d 'host=157.249.*.* user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose` + +### for testing: + +Take out one of the replicas (or can shut off instance in the openstack GUI): +`sudo pg_ctlcluster 16 main -m fast stop` +For bringing it back up (or turn it back on): +`sudo pg_ctlcluster 16 main start` + +### Load balancing + +This role creates a user and basic db for the loadbalancer to test the health of the db. Part of the role is allowed to fail on the secondary ("cannot execute \_\_\_ in a read-only transaction"), as it should pass on the primary and be replicated over. The hba conf change needs to be run on both. + +The vars are encrypted, so run: ansible-vault decrypt roles/bigip/vars/main.yml + +Then run the bigip role on the VMs: + +``` +ansible-playbook -i inventory.yml -e bigip_password=... bigip.yml +``` + +### Links: + +https://www.enterprisedb.com/postgres-tutorials/postgresql-replication-and-automatic-failover-tutorial#replication + +#### Useful ansible commands: + +```terminal +ansible-inventory -i inventory.yml --graph + +ansible servers -m ping -u ubuntu -i inventory.yml +``` diff --git a/ansible/bigip.yml b/ansible/bigip.yml index 5fc6dcf8..e36c8c7c 100644 --- a/ansible/bigip.yml +++ b/ansible/bigip.yml @@ -1,52 +1,7 @@ -- name: Copy schema for bigip - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT - hosts: localhost # need to seperate this since done from localhost - gather_facts: false - pre_tasks: - # copy file, so we have an .sql file to apply locally - - name: Create a directory if it does not exist - ansible.builtin.file: - path: /etc/postgresql/16/db/bigip - state: directory - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][0]].ansible_host }}' - remote_user: ubuntu - - name: Copy the schema to the remote 1 - ansible.builtin.copy: - src: ./roles/bigip/vars/bigip.sql - dest: /etc/postgresql/16/db/bigip/bigip.sql - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][0]].ansible_host }}' - remote_user: ubuntu - - name: Create a directory if it does not exist - ansible.builtin.file: - path: /etc/postgresql/16/db/bigip - state: directory - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][1]].ansible_host }}' - remote_user: ubuntu - - name: Copy the schema to the remote 2 - ansible.builtin.copy: - src: ./roles/bigip/vars/bigip.sql - dest: /etc/postgresql/16/db/bigip/bigip.sql - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][1]].ansible_host }}' - remote_user: ubuntu - +--- - name: Create what is needed for the bigip load balancers hosts: servers remote_user: ubuntu - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT gather_facts: false - # loops over both servers roles: - - role: bigip - # will fail to create table in the standby (since read only) \ No newline at end of file + - role: bigip diff --git a/ansible/configure.yml b/ansible/configure.yml index ff586fd0..73463fc1 100644 --- a/ansible/configure.yml +++ b/ansible/configure.yml @@ -1,35 +1,50 @@ +--- - name: Mount disks and install stuff on the VMs hosts: servers remote_user: ubuntu + gather_facts: true vars: - ostack_cloud: lard - ipalias_network_name: ipalias - ostack_region: Ostack2-EXT - # loops over both servers - pre_tasks: - - name: List ansible_hosts_all difference from ansible_host (aka the vm not currently being iterated on) - ansible.builtin.debug: - msg: "{{ (ansible_play_hosts_all | difference([inventory_hostname])) | first }}" - roles: - - role: addsshkeys - - role: vm_format - vars: - name_stuff: '{{ inventory_hostname }}' # name of current vm for finding ipalias port - - role: ssh + primary: lard-a # or provide via cmd + fip: # provide via cmd + db_password: # provide via cmd + repmgr_password: # provide via cmd + + tasks: + - name: Add keys to local known_hosts + ansible.builtin.include_role: + name: ssh + tasks_from: localhost.yml + + - name: Add user SSH keys + ansible.builtin.include_role: + name: ssh + tasks_from: users.yml + + - name: Format VM + ansible.builtin.include_role: + name: ostack + tasks_from: vm_format.yml vars: - vm_ip: '{{ ansible_host }}' # the current vm's ip + ostack_repmgr_password: "{{ repmgr_password }}" -- name: Setup primary and standby - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT - hosts: localhost - gather_facts: false + - name: Share postgres SSH key between hosts + ansible.builtin.include_role: + name: ssh + tasks_from: postgres.yml + + - name: Setup primary host + ansible.builtin.include_role: + name: ostack + tasks_from: create_primary.yml + when: inventory_hostname == primary + vars: + ostack_db_password: "{{ db_password }}" + ostack_floating_ip: "{{ fip }}" - roles: - - role: primarystandbysetup + - name: Setup standby host + ansible.builtin.include_role: + name: ostack + tasks_from: create_standby.yml + when: inventory_hostname != primary vars: - primary_name: lard-a - primary_ip: '{{ hostvars[groups["servers"][0]].ansible_host }}' # the first one is a - standby_name: lard-b - standby_ip: '{{ hostvars[groups["servers"][1]].ansible_host }}' # the second one is b \ No newline at end of file + ostack_primary_ip: "{{ hostvars[primary].ansible_host }}" diff --git a/ansible/deploy.yml b/ansible/deploy.yml index e17750af..d3ff2983 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -1,7 +1,19 @@ --- -- name: Deploy binaries - # Deploy on both VMs, only the primary is "active" +- name: Build LARD binaries + hosts: localhost + gather_facts: false + tasks: + - name: Build + ansible.builtin.command: + chdir: .. + cmd: cargo build --workspace --release + register: output + changed_when: output.stderr_lines | length > 1 + +- name: Deploy LARD binaries hosts: servers remote_user: ubuntu + gather_facts: false + roles: - role: deploy diff --git a/ansible/group_vars/servers.yaml b/ansible/group_vars/servers.yaml deleted file mode 100644 index abfaab5d..00000000 --- a/ansible/group_vars/servers.yaml +++ /dev/null @@ -1,15 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -39323433373866646663333962396637323937663436333763373663306264363337383838313531 -3237623337373630343264663232346366316332326564330a343062633165363564616663373366 -38303633346231626433393232313332373933626432613534646538326638623339323830613465 -3135643661323930370a656136326637373933353665316462373938396338383831353039323731 -61363032653830613438313564303432613531636436306662666336383838616132666234616538 -34313861306432626237383464623134386565643831396537343232646237323764346633373461 -30333265653634313436323735633733623032333039303461633931383337333664333636386532 -66383830323265303334323163313862393466383362646634653764373230613464393332363336 -63346438306666633835316333323464623261643861646636316637346363626162303662303039 -38616335356663343762356465653635623330383532656464633537333933613632336433653838 -36633130356262383464653935653864323233346162656639303861643533643563376464633530 -62343336663961363566383438393866353336366438343365363663623162313838396666343539 -39363766316532626463363533653561333933663130353632353934393534333965346637626636 -3138623135623031386437353434643736323166623661666263 diff --git a/ansible/group_vars/servers/main.yml b/ansible/group_vars/servers/main.yml new file mode 100644 index 00000000..4a06162e --- /dev/null +++ b/ansible/group_vars/servers/main.yml @@ -0,0 +1,30 @@ +--- +ostack_cloud: lard +ostack_state: present +ostack_region: Ostack2-EXT +ostack2: true + +# networks +ostack_network_name: "{{ vault_ostack_network_name }}" +ostack_network_cidr: "{{ vault_ostack_network_cidr }}" +ostack_network_dns: "{{ vault_ostack_network_dns }}" +ostack_network_security_groups: "{{ vault_ostack_network_security_groups }}" +ostack_ipalias_network_cidr: "{{ vault_ostack_ipalias_network_cidr }}" + +# vm_create +ostack_vm_flavor: "{{ vault_ostack_vm_flavor }}" +ostack_vm_image: "{{ vault_ostack_vm_image }}" +ostack_vm_security_groups: "{{ vault_ostack_vm_security_groups }}" +ostack_vm_volume_type: "{{ vault_ostack_vm_volume_type }}" +ostack_vm_volume_size: "{{ vault_ostack_vm_volume_size }}" + +# vm_format +ostack_mount_device: "{{ vault_ostack_mount_device }}" +ostack_mount_point: "/mnt/ssd-data/" + +# ssh +ssh_user_key_list: "{{ vault_ssh_user_key_list }}" + +# bigip +bigip_password: "{{ vault_bigip_password }}" +bigip_load_balancer_ips: "{{ vault_bigip_load_balancer_ips }}" diff --git a/ansible/group_vars/servers/vault/bigip.yml b/ansible/group_vars/servers/vault/bigip.yml new file mode 100644 index 00000000..7ffc21e8 --- /dev/null +++ b/ansible/group_vars/servers/vault/bigip.yml @@ -0,0 +1,11 @@ +$ANSIBLE_VAULT;1.1;AES256 +37363262383434633232343266333761666266353731303238376537616263646662363339396663 +6532353866626566313132373939306638643334643064650a383333316663393765343131643539 +36356635336235393363373431326537393436653665396438303831333764356135343935636466 +3430336239666233390a616233386133666237646466396261643138623230623464333431663136 +30363230663462343138613261346330623035346532613366656637636165376262323965633630 +65623032343762663266663933343838663061393932666664353362333436323236613165313232 +38643262646139643536323362383066353439656338393966316539306163373732613630366536 +62623365316235633430303432613662396134663137646431643565643561333436316233396262 +62393263646236663636623162666666666664373061636465653631326639653839643636363837 +3364373361393463646164343735386430303931376461663664 diff --git a/ansible/group_vars/servers/vault/networks.yml b/ansible/group_vars/servers/vault/networks.yml new file mode 100644 index 00000000..2c3c36db --- /dev/null +++ b/ansible/group_vars/servers/vault/networks.yml @@ -0,0 +1,100 @@ +$ANSIBLE_VAULT;1.1;AES256 +30336539613233356431383363663636646332363034333338376436626361633330656461616230 +6239366231393335373865623566613462386562383265310a326563613431653863323032643832 +35373234663731363133393934306663346163336335393437393736313636613766643531393134 +6266623730373535350a663565626130663561353038613837376231646334643739396239313233 +31653463396635323834373036643032613636636363353530353866623034313764363364336535 +32306431633837613463333939373335363731313139653438366564613461353139623765363165 +35306136366330323064343761306465336138316261323033646163316237636131316231633435 +37323339663536303334373534663738626564633935646338323538326265396435633762643031 +35663562633562653431623637373961353765376232393363616233353864313832353531636166 +34623437646433636433313264313366306636346436366331613437623931653765643133666238 +34363636313836313239326538346332663834373064333233613939306539336163383164383665 +37626533393561326136383139306635646266396461353835353130383035346231353132373362 +31643338363131323035313436633537333632336636323763643730623731323564346635303937 +31363061353936396334313131663065653638623034663539613931656432313335353037376433 +65353635616232303763346666653263636339393664613431313762396531663037356464316663 +62616130323863356432633035643139643334666139396236396466313935313637383836323436 +32656366616438373831656331306335636132666131616262386664383033626661343262633034 +36333439623133313765613834356438313962643038373530623537613336646139643364323765 +36363961303435326436386136343337373438373035326366366536626466306365613466366436 +33643863613632333636643565346636613461616263316662656631363038313164363337396164 +30316434613562313261663361343466386131343635393066643930643439636336613764396632 +32353039623162333265373763653464663436626236626636656534656337306165313666656139 +37646337323434636230376138363738613139333365623136346438396630666162313136313832 +36376532366437653233333061653431373238336463396366303934643665376362646639643466 +65626261623330636133356263653939353564643131333164396365343961333235613365346363 +62333536663431306131323333646230346664306130366639363334616135306439346634636530 +35626336366564323363373535626431636235303666326631363862356138313566306134383831 +38316332383034306365396538393039356337396433383433353861666438366465656465613665 +33663932636532343032333634633135306136663936326230623666646461386637353761663762 +63376433313863316536343230396462616161343966396464393065306337353631353735333038 +61356430356537633763623730393766373438313930616661653366386230643865386332643963 +65343463356261393066613339663430616561333633633762326461373630373333386531643134 +33623763363462656661613762386339383638663935633835613264643831303762313464643366 +30363132386233643963623036653132646164626366333631343935373237303162626263373332 +64636565323434663834336266313262616165316539313930336265383339333462646139396466 +32656166366636376661323436626463653239333865353366616461656330666335346137396234 +66356266336232623635656339386561616564626666636263663732393839376630366463316630 +33623964633331376533323739383133363835666661636133646461396437306631626539663463 +33653838373038343463323936333433643164313466376530363033646339376230336135383032 +62643739396233633264626563376231666531366637626464353136633833666333393032633937 +37316533346566656633333634363132356133333433626265663239346265653664653563303735 +63303665393333366263306666303464303632353534333265383364616530376663386264343532 +35343433346636366339636130313235626462353030363132646439383736306665333734386231 +63633839343131633463343830616533353764303134663966313538666433636565626430353166 +33666166356565333036653762613736386535356331316366396361393934663733313937613638 +37376261366538306332646366326537613263633634353831366336613431333139616433323561 +66316162356263376239653038663534663466383538316264653966353835656137376661333434 +33336461353539386533656134396436363632303532306339366435353139333435356431393930 +37326633663665616535633839663632653231646438376639636165356161343637353761656337 +65346665613434306365343534383262613763326562303966386662616161303631306530366330 +34666465383132353261373938386531326430653030363834393136663161666336333562353235 +62353037646539643765323463653534326439646331663730353538623061656436313731356561 +61343238376234643139323732633261376134613433383638626230623531396262306339326539 +32393734613533353231313536313133613336366439663334303733666264386239656630616164 +32656235663636373133626439363632653937353631623864316136383863343730643838613434 +39663437616535383034303234656365336437346233643136643738386635336539343937396637 +32383766353038313266306530396662303961386631356331653566643537356264623562353835 +34393537373035323533323462623730383230386462363665323637663064363262306263323663 +65356439323237633437663766616131636161336466323964323731643431643833313531383337 +65616136336435373033663661623339623634663232613133313237666539306531346231626135 +31653864633865363839323232306332383032623232623530393031386239346362383562353837 +37633933623964663163373430373465303031316431346139666237353763386633366163626164 +62613430336363336331633664303234656632613733326666396230643464373836343535393463 +61666565343764626231663133393262336563396439376338323436616430373562646262616465 +63393261643439326164336266623339336566373335333037643232656166656164616362323366 +62373536313231303530663831646462313737623536343534643130663633653065636263313730 +65396662646264313830633164313132303739653438376365343735396436633164613465666430 +30613266383731643764343962346432653038323632636437656162633832353562616530633131 +34646264373335313033386635373132386638613662373363356638393464306264376530623137 +61383239333039663962356561356365623134393563646432333463363830656336643231343937 +63613965656435343162333632336361313662376265313064326432316463343631623364333636 +65343130313262636634323065613530623536643462616662653863653230343066303163326463 +33643762656638303733633135316664653163636236653433333065306166393739613664303436 +37636334646339336336666135366162323138643839623832366333393064393237643566363934 +37663262323634313063653839363932386534303733336536326533336133383030343837643738 +63666665373032336163623234313337356462353830616237306337643137383433616466616239 +33346365386566643131323862623136336339353335643338346534383664643331346432343533 +39623561323136383365313336633165623964323631363063356261383466626666656139613834 +33313662666266313661353564613030643436646530643033626564396134366265643162613764 +37613739323532623632343735313432393936643662396365393561326461313233653135653737 +33373461386433643435336530613563396438393235663662643961643134356433626461316366 +32343439643030386463653536313636646634653664356335376638323864656164356162393239 +34623935346537323166303630613339656232383336333532323766316564343935313737626564 +35396332643835636430373039616237333862373264653535653963623064653838643564633132 +30313333643365373333373963393965663261353461636362393135343661343864326635626333 +37643661613332393634616333393732323035663636333439386563636365366135393866616463 +63353431393061313563323131386363383438376634633663393061373133336261336239303165 +35303433393539343530396638306130353932386463666662633061303335646463353236383032 +37393934643466323837656235363264336330383939613766393565666433613963333966653432 +64363538356463633131323664653637663965366131613932306236326166326433616264376338 +33663836653463343962663965646139373932386430633835356533643434346335323461656330 +31653231666266366461626162613138383434336537373030366363656137323831343033353335 +31616634396636623433636266666135306363323661356231626335336539623036613865383037 +66383262323762613561663662323538393364656238613637383233333864386562363437633832 +33316662663066613435303466333738363538356634613363653734356665313831666162663637 +61363433393638653931663866623166623065303666343839373438313661313964623335626637 +36353661633165343331303230623932383533653232353133313432616365373461313632323430 +61623662303833323864393437346363333235363265666565336532653432653763393532663462 +623634386662376464346536326462346635 diff --git a/ansible/group_vars/servers/vault/ssh.yml b/ansible/group_vars/servers/vault/ssh.yml new file mode 100644 index 00000000..eee4b18a --- /dev/null +++ b/ansible/group_vars/servers/vault/ssh.yml @@ -0,0 +1,41 @@ +$ANSIBLE_VAULT;1.1;AES256 +63353466666334343338656431656133396463396434343732373338356662626335616332363764 +6235366139616332396262373330306563376336613832630a356534346239643964343530353633 +34323536316661336633383334363933393336303066663634326565663135613732323162303537 +3838303766643532390a653338316233623934356638366362383461396166323965306131623539 +31306565616662373530366438393434613936373363323833363336323036623535316463353963 +35353966373832646563633662656332333664346238343634306465333062306438303833633035 +36386564313437646365366337636463373739336239393437343132376639303932653331363131 +38343133333065656662376564643865666132616531343138353639376534633133333038353564 +62633864393365363030393763333364613562393338636263316533383630393263656533656230 +63373636343532646530663338343837616361613130346337326330616531373432336465323535 +38336435303533363633663938653762383236356531666666373065613762353138356261333166 +65623735323231343936383131306339663335653663396438326232666237613337636466376338 +61666464323336386564633733656531666335313364343461306366633733383661636664613465 +35336166333763636233376464303830316531663535663365613530653239633065356462336235 +37353937313333393031636135353830336561626361636563633838333032383836383033363430 +66653433303330333962353966616663316362363832666166613261623165656234363231333836 +36663435356233326366666561613933666635383339353666316234326534656136306432303138 +63373563373935373830363161316232643362363565323032373861376566646532373235353239 +30643966336639643937333161343630643933613866303733616130623736383163626439663966 +65353731383733316135363638353235386262633938326438303632616130393932366666326431 +34366230386665653231353137663931336361306133636535386262353034303264396661316133 +34656263386665356164303531333561313231663031383333323739623166336432313736653639 +65306139363839316239366234363062343764313663643864303963396630643831383837326237 +39396235336661323731373731386162306665336362373461333032356531653435666136333939 +64663239396364653562343233613733623663383435383333386639623732353665633234643331 +39343561663630656239323737313737333334303731643761613366616339666263656636366334 +39663233363532383130353332343535323536636339313334346164346163366266633139333439 +63356432613230376462336333386533363737646437373665663935303936363462653837333632 +34626431313633663161343063323039616439386536316437373938336663356636663565373936 +36663935633838653137323134313630396661373935386235613463616132333064313939303834 +65343965626239343565376437363062343961333931376663646638386238626138383338356534 +61396332626536613539333765613430336438336130366235666330633336316138373261383837 +64396531626336613130326332373834633333623261643434396663333036313765643363616436 +35626433336639323066656262653663383230643334353231393165373338376132646537373466 +61383563393763306434643837326437643966386435353633376333323234613438396432653832 +33623465346332333835383564373563383164376161376133636331393362323362643536396331 +30613231336630663338386334653830643133366136393431633238323337373233663830366237 +37626237626437363435366235643630336638656465353338623831666239323136353062616132 +62393864373936336231313230333536353130363432373938663632336265303666386139333937 +35376362666333626334 diff --git a/ansible/group_vars/servers/vault/vm_create.yml b/ansible/group_vars/servers/vault/vm_create.yml new file mode 100644 index 00000000..e2d98e18 --- /dev/null +++ b/ansible/group_vars/servers/vault/vm_create.yml @@ -0,0 +1,18 @@ +$ANSIBLE_VAULT;1.1;AES256 +61653730313434633839383233623835393233633431316630636237316566323534366336613931 +3332623735316636343738363137653735633661326631310a323164353766313530626431373036 +34383930626136346236393933396562643332323562396231356461303839336131313539343164 +6431393563313739340a343164306164386538356238303166663863636164666366653639333433 +30346164386137313830613033346661323765383563356265313230343133313661373163656136 +30336639656663643364633761383031333339363966303731333537306162303733326331323131 +32323064653164313763633530313032313030393334616539633036333235333162336363373262 +64373531666537613538636330623935636434643239303063306563323166383734303036376630 +64653332353764623639363031653664336265333730373135303930386639353663346233393133 +36623336313031336635653961326630313962393739393837646637353961633534626333366665 +39666430313966653938313733393434306437366465346565303135343662316132333132356336 +63303262303461333639333365383937393161333338636263616536633534616335333639623539 +31353166636131326132653264623035653164663435303334373235396236666666643839326464 +38633666383139323935663436613433653963316337663532343262313439663734343438666561 +65623061303462323935373462373432653936613734663339383931666337626165363639613866 +61633138363239393966643735313037336530626562303532313263653062643263643431303164 +3539 diff --git a/ansible/group_vars/servers/vault/vm_format.yml b/ansible/group_vars/servers/vault/vm_format.yml new file mode 100644 index 00000000..da5c0ea5 --- /dev/null +++ b/ansible/group_vars/servers/vault/vm_format.yml @@ -0,0 +1,9 @@ +$ANSIBLE_VAULT;1.1;AES256 +34616162303333656236623730383739373033303835323039383138656633356133383366306439 +6536336662623964663434313539356531666538313462360a363631653234383831333432666161 +32383137326239653137656364333261303035643438303839323233313632633832633764383963 +6636303035313236610a313634306132396364653339613037343031373239663065396564326438 +61383532353432336530623136316231376662633563373361306462366534366335363534626562 +38343337383334346164663433303933366434386239313135383734623431613331623239313532 +34366662623632353530666134326263383132353231366236636562343163396233313664386364 +61616433376264373034 diff --git a/ansible/host_vars/lard-a.yml b/ansible/host_vars/lard-a.yml new file mode 100644 index 00000000..4c27d206 --- /dev/null +++ b/ansible/host_vars/lard-a.yml @@ -0,0 +1,9 @@ +--- +ansible_host: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 31643933383935663537663361333834376232386566326531313161336238633262386432663939 + 6231613431336130396635393563373666323735643932360a653261373935373365333433313938 + 38663032303035646139306532666131383132333230633464643632333561323565353538323530 + 6563633966623536620a636634316463393763326237656365623636633264663765623439613430 + 3865 +ostack_availability_zone: ext-a diff --git a/ansible/host_vars/lard-b.yaml b/ansible/host_vars/lard-b.yaml new file mode 100644 index 00000000..d550fca3 --- /dev/null +++ b/ansible/host_vars/lard-b.yaml @@ -0,0 +1,9 @@ +--- +ansible_host: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 64376433646538343437656539363938333262306538323264623139393263666239376438323830 + 6162646538323466656135393235613865366530653465340a343434346233303030376463616561 + 36323436393364363364333066316463363937343838636134346134656636393230356639613462 + 3535383662326334630a316630366265313038313563323664376464333566303531343239636431 + 6536 +ostack_availability_zone: ext-b diff --git a/ansible/inventory.yml b/ansible/inventory.yml index a0a62a07..a57d71e6 100644 --- a/ansible/inventory.yml +++ b/ansible/inventory.yml @@ -1,6 +1,5 @@ +--- servers: hosts: lard-a: - ansible_host: 157.249.*.* lard-b: - ansible_host: 157.249.*.* \ No newline at end of file diff --git a/ansible/migrate.yml b/ansible/migrate.yml index cf6019ca..659a8002 100644 --- a/ansible/migrate.yml +++ b/ansible/migrate.yml @@ -1,10 +1,12 @@ --- +# TODO: update this when we change file names in db directory - name: Perform DB migration hosts: servers remote_user: ubuntu gather_facts: false vars: - primary: lard-a + # TODO: is there a better way to get this fact automatically? + primary: lard-a # or provide via cmd tasks: - name: Copy the db folder to the remote @@ -14,17 +16,13 @@ mode: "0755" become: true - - name: Migrate primary - when: inventory_hostname == primary + - name: Update schema in lard + community.postgresql.postgresql_script: + db: lard + path: "/etc/postgresql/16/db/{{ item }}" + loop: + - public.sql + - partitions_generated.sql become: true become_user: postgres - block: - - name: Update public schema in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/public.sql - - - name: Create partition tables in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/partitions_generated.sql + when: inventory_hostname == primary diff --git a/ansible/provision.yml b/ansible/provision.yml index 11bd242b..c616e946 100644 --- a/ansible/provision.yml +++ b/ansible/provision.yml @@ -1,20 +1,26 @@ -- name: setup networks and 2 vms - vars: - ostack_cloud: lard - ipalias_network_name: ipalias - ostack_region: Ostack2-EXT - hosts: localhost +--- +- name: Provision + hosts: servers gather_facts: false + vars: + key_name: # provide via cmd - roles: - - role: networks - - role: vm # in A - vars: - name_stuff: lard-a - availability_zone: ext-a - vm_ip: '{{ hostvars[groups["servers"][0]].ansible_host }}' - - role: vm # in B + tasks: + - name: Setup networks # noqa run-once + ansible.builtin.include_role: + name: ostack + tasks_from: networks.yml + apply: + delegate_to: localhost + run_once: true + tags: + - network + + - name: Create VMs + ansible.builtin.include_role: + name: ostack + tasks_from: vm_create.yml + apply: + delegate_to: localhost vars: - name_stuff: lard-b - availability_zone: ext-b - vm_ip: '{{ hostvars[groups["servers"][1]].ansible_host }}' + ostack_vm_key_name: "{{ key_name }}" diff --git a/ansible/readme.md b/ansible/readme.md deleted file mode 100644 index f2f0a5f1..00000000 --- a/ansible/readme.md +++ /dev/null @@ -1,182 +0,0 @@ -## README for LARD setup on openstack(2) - -#### Useful ansible commands: -``` -ansible-inventory -i inventory.yml --graph - -ansible servers -m ping -u ubuntu -i inventory.yml -``` - -#### Dependencies to install -``` -pip3 install wheel # so you can allow downloading of binary python packages - -pip install -r requirements.txt - -ansible-galaxy collection install openstack.cloud - -ansible-galaxy collection install community.postgresql - -ansible-galaxy collection install community.general - -ansible-galaxy collection install ansible.posix - -ansible-galaxy collection install ansible.utils - -``` - -### Get access to OpenStack -You need to create application credentials in the project you are going to create the instances in, so that the ansible scripts can connect to the right ostack_cloud which in our case needs to be called lard. - -The file should exist here: -~/.config/openstack/clouds.yml - -If have MET access see what is written at the start of the readme here: -https://gitlab.met.no/it/infra/ostack-ansible21x-examples - -Or in the authentication section here: -https://gitlab.met.no/it/infra/ostack-doc/-/blob/master/ansible-os.md?ref_type=heads - -### Add your public key to the Ostack GUI -Go to "Compute" then "Key Pairs" and import your public key for use in the provisioning step. - -### Provision! -The IPs in inventory.yml should correspond to floating ips you have requested in the network section of the open stack GUI. If you need to delete the old VMs (compute -> instances) and Volumes (volumes -> volumes) you can do so in the ostack GUI. *For some reason when deleting things to build up again one of the IPs did not get disassociated properly, and I had to do this manually (network -> floating IPs).* - -The vars for the network and addssh tasks are encrypted with ansible-vault (ansible-vault decrypt roles/networks/vars/main.yml, ansible-vault decrypt roles/addshhkeys/vars/main.yml, ansible-vault decrypt roles/vm_format/vars/main.yml). -But if this has been setup before in the ostack project, these have likely already been run and therefore already exits so you could comment out this role from provision.yml. -Passwords are in ci_cd variables https://gitlab.met.no/met/obsklim/bakkeobservasjoner/lagring-og-distribusjon/db-products/poda/-/settings/ci_cd - -``` -ansible-playbook -i inventory.yml -e ostack_key_name=xxx provision.yml -``` - -After provisioning the next steps may need to ssh into the hosts, and thus you need to add them to your known hosts. Ansible appears to be crap at this, so its best to do it before running the next step by going: -`ssh ubuntu@157.249.*.*` -For all the VMs. -If cleaning up from tearing down a previous set of VMs you may also need to remove them first: -`ssh-keygen -f "/home/louiseo/.ssh/known_hosts" -R "157.249.*.*"` - -### Configure! -The third IP being passed in here is the one that gets associated with the primary, and moved when doing a switchover. -*NOTE:* The floating IP association times out, but this is ignored as it is a known bug. - -``` -ansible-playbook -i inventory.yml -e primary_floating_ip='157.249.*.*' -e db_password=xxx -e repmgr_password=xxx configure.yml -``` - -The parts to do with the floating ip that belongs to the primary (ipalias) are based on: -https://gitlab.met.no/ansible-roles/ipalias/-/tree/master?ref_type=heads - -### Connect to database -``` -PGPASSWORD=xxx psql -h 157.249.*.* -p 5432 -U lard_user -d lard -``` - -### Checking the cluster - -Become postgres user: sudo su postgres -``` -postgres@lard-b:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check -Node "lard-b": - Server role: OK (node is primary) - Replication lag: OK (N/A - node is primary) - WAL archiving: OK (0 pending archive ready files) - Upstream connection: OK (N/A - node is primary) - Downstream servers: OK (1 of 1 downstream nodes attached) - Replication slots: OK (node has no physical replication slots) - Missing physical replication slots: OK (node has no missing physical replication slots) - Configured data directory: OK (configured "data_directory" is "/mnt/ssd-b/16/main") -``` -``` -postgres@lard-a:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check -Node "lard-a": - Server role: OK (node is standby) - Replication lag: OK (0 seconds) - WAL archiving: OK (0 pending archive ready files) - Upstream connection: OK (node "lard-a" (ID: 1) is attached to expected upstream node "lard-b" (ID: 2)) - Downstream servers: OK (this node has no downstream nodes) - Replication slots: OK (node has no physical replication slots) - Missing physical replication slots: OK (node has no missing physical replication slots) - Configured data directory: OK (configured "data_directory" is "/mnt/ssd-b/16/main") -``` - -While a few of the configurations are found in /etc/postgresql/16/main/postgresql.conf (particularly in the ansible block at the end), many of them -can only be seen in /mnt/ssd-b/16/main/postgresql.auto.conf (need sudo to see contents). - -### Perform switchover -This should only be used when both VMs are up and running, like in the case of planned maintenance on one datarom. -Then we would use this script to switch the primary to the datarom that will stay available ahead of time. - -*Make sure you are aware which one is the master, and put the names the right way around in this call.* - -``` -ansible-playbook -i inventory.yml -e name_primary=lard-a -e name_standby=lard-b -e primary_floating_ip='157.249.*.*' switchover.yml -``` - -This should also be possible to do manually, but might need to follow what is done in the ansible script (aka restarting postgres on both VMs), then performing the switchover: -`repmgr standby switchover -f /etc/repmgr.conf --siblings-follow` (need to be postgres user) - -### Promote standby (assuming the primary is down) -Make sure you are know which one you want to promote! -This is used in the case where the primary has gone down (e.g. unplanned downtime of a datarom). - -**Manually:** -SSH into the standby -`repmgr -f /etc/repmgr.conf cluster show` -Check the status (The primary should say its 'uncreachable') -`repmgr -f /etc/repmgr.conf standby promote` -Then promote the primary (while ssh-ed into that VM) -You can the check the status again (and now the old primary will say failed) - -Then move the ip in the ostack gui (see in network -> floating ips, dissasociate it then associated it with the ipalias port on the other VM) - -#### Later, when the old primary comes back up -The cluster will be in a slightly confused state, because this VM still thinks its a primary (although repmgr tells it the other one is running as a primary as well). If the setup is running as asynchronous we could lose data that wasn't copied over before the crash, if running synchronously then there should be no data loss. - -SSH into the new primary -`repmgr -f /etc/repmgr.conf cluster show` -says: -- node "lard-a" (ID: 1) is running but the repmgr node record is inactive - -SSH into the old primary -`repmgr -f /etc/repmgr.conf cluster show` -says: -- node "lard-b" (ID: 2) is registered as standby but running as primary - - -With a **playbook** (rejoin_ip is the ip of the node that has been down and should now be a standby not a primary): -``` -ansible-playbook -i inventory.yml -e rejoin_ip=157.249.*.* -e primary_ip=157.249.*.* rejoin.yml -``` - -Or **manually**: -Make sure the pg process is stopped (see fast stop command) if it isn't already - -Become postgres user: -`sudo su postgres` -Test the rejoin (host is the IP of the new / current primary, aka the other VM) -`repmgr node rejoin -f /etc/repmgr.conf -d 'host=157.249.*.* user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose --dry-run` -Perform a rejoin -`repmgr node rejoin -f /etc/repmgr.conf -d 'host=157.249.*.* user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose` - -### for testing: -Take out one of the replicas (or can shut off instance in the openstack GUI): -`sudo pg_ctlcluster 16 main -m fast stop` -For bringing it back up (or turn it back on): -`sudo pg_ctlcluster 16 main start` - -### for load balancing at MET -This role creates a user and basic db for the loadbalancer to test the health of the db. Part of the role is allowed to fail on the secondary ("cannot execute ___ in a read-only transaction"), as it should pass on the primary and be replicated over. The hba conf change needs to be run on both. - -The vars are encrypted, so run: ansible-vault decrypt roles/bigip/vars/main.yml - -Then run the bigip role on the VMs: - -``` -ansible-playbook -i inventory.yml -e bigip_password=xxx bigip.yml -``` - -### Links: - -https://www.enterprisedb.com/postgres-tutorials/postgresql-replication-and-automatic-failover-tutorial#replication \ No newline at end of file diff --git a/ansible/rejoin.yml b/ansible/rejoin.yml index 701d1cb9..6e377de6 100644 --- a/ansible/rejoin.yml +++ b/ansible/rejoin.yml @@ -1,9 +1,13 @@ +--- - name: Rejoin - hosts: servers + hosts: "{{ rejoin }}" remote_user: ubuntu - # loops over both servers + vars: + # Old primary host that went down + rejoin: # provide via cmd + # New primary host after it was promoted + primary: # provide via cmd + roles: - role: rejoin - vars: - vm_ip: '{{ ansible_host }}' # the current vm's ip - when: ansible_host == rejoin_ip # only run on the one that needs to be rejoined \ No newline at end of file + rejoin_primary_ip: "{{ hostvars[primary].ansible_host }}" diff --git a/ansible/requirements.txt b/ansible/requirements.txt index 29772cb6..32a84a67 100644 --- a/ansible/requirements.txt +++ b/ansible/requirements.txt @@ -1,8 +1,9 @@ -ansible-core~=2.15.0 -ansible-lint~=6.17.0 +ansible-core~=2.17.4 +ansible-lint~=24.9.2 powerline-status powerline-gitstatus netaddr~=0.7.19 openstacksdk~=1.3.0 python-openstackclient~=6.2.0 -psycopg2-binary \ No newline at end of file +psycopg2-binary +wheel diff --git a/ansible/requirements.yml b/ansible/requirements.yml new file mode 100644 index 00000000..45323db2 --- /dev/null +++ b/ansible/requirements.yml @@ -0,0 +1,7 @@ +--- +collections: + - ansible.posix + - ansible.utils + - community.general + - community.postgresql + - openstack.cloud diff --git a/ansible/roles/addsshkeys/tasks/main.yml b/ansible/roles/addsshkeys/tasks/main.yml deleted file mode 100644 index 5881bf29..00000000 --- a/ansible/roles/addsshkeys/tasks/main.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -- name: Add users keys to authorized_keys - ansible.builtin.authorized_key: - user: ubuntu # this is the username on the remotehost whose authorized keys are being modified - state: present - key: "{{ item.key }}" - loop: '{{ authorized_keys_list }}' - loop_control: - label: "adding {{ item.name }} key to authorized_keys" \ No newline at end of file diff --git a/ansible/roles/addsshkeys/vars/main.yml b/ansible/roles/addsshkeys/vars/main.yml deleted file mode 100644 index 8ed8d280..00000000 --- a/ansible/roles/addsshkeys/vars/main.yml +++ /dev/null @@ -1,44 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -38656433653332383463656338306632666166636637363236383138306164393837633762363633 -3662633839613338366235343733633534613235323435380a346464663937373266636362376265 -36356165393035623866386563626635373964333661626166346462623434346362616562656639 -3238313131316232390a343562336665303364636136343138346134653362663164373234363732 -65666339306464346466366166666133613532386331643566343534663764376166663734356138 -32633361363765306233653639316336363361316362633430333539306565663033666261613836 -63646364383763613763323836363063393330653033333539326666613533623336666633323665 -35383464363762346136393362306136613164646234643861653965363139363234363362656163 -31333162336434356434653439343436343863316237323065333764303766663732333838646337 -66356432366161633431316334653532633666613431313738353930623764353333393838393230 -34396666643232653036626635613732633265353532646563623733303439633861373233623535 -37366531303131653234643338643232623034663763373764383563393732376138306130653862 -64306637333230333736633361356338383561633166623735376432336265353565636635646661 -37336161373236613839323335643163386162663837353236656539353635356231666265323064 -63646139383961396330653865353362643934643331646263313761353866336538353865323334 -39633438666564363331336162376664303036663638393339353637326132376466316639653238 -63313361333165633033373934336236613538336437343033383437626664316562303533663337 -35393565333336323537656436373339343330353536313262626535333434373939323561353061 -32363566393930353734376163633063386439376365613039383334616462316636313339646462 -65356266353537373534333464333631383264633636366234313337656130313761343539363361 -62313531316339353037386330363438346331323539616531326265396330623866393130306265 -31353137383931333737353664626639303033653137326264393762306532386466343239653561 -37396632633230383138666366353133613364666666646136363038323762623930363034376233 -38636365633333323861376666346534336564373039313166393062373939383438656234396135 -64313964383539666137366334303937366437363430393437333531326132326364396362613737 -32323634396263363330323535396662663834336337373563383235393132303262643961396236 -37613935613330356635653664343264396230336437663138643861373930313364306330376363 -31363137656364323638373761613735356637353731386663303766663830396664376230333532 -31356134353864643762303639343062636661313263353938393865306665313866663333636631 -64623132663632333534386466393933373764363534376465353435646438643665666561383135 -31636337376234323263363737633835343339313335343865353363653165636637343930343431 -35333938323335613162313730373065643762376632626664323535623064363439633161623335 -33313834396164313131346536646133623630393831623363306638626133633136356163653432 -38633239336264316539633566633935663532333236613664663361316232393961633464333936 -30623539633535613137363036396665663536646135323731356461663030616666303263366133 -33323065323138633032316434353333393764323435626633326538616432653330353961353730 -65343636656231616165336363653535653437623761623863343833643764383930353434386339 -32653130633839643939383337363031353464313466393539326437633962356138366534303834 -64643464353765663536626233333730613038383662643366303764363462646265346163333237 -34353533346138643435326130636464626465373466633562656237353965623530656335373665 -30323238623033313766386338313537396663623963633334386437616461623130326638363737 -37366637363233313635396433623932303363373032623664356164306232623036396437663336 -306530323762306438626534346563346135 diff --git a/ansible/roles/bigip/defaults/main.yml b/ansible/roles/bigip/defaults/main.yml new file mode 100644 index 00000000..660c97c9 --- /dev/null +++ b/ansible/roles/bigip/defaults/main.yml @@ -0,0 +1,4 @@ +--- +bigip_password: +bigip_load_balancer_ips: + - address: diff --git a/ansible/roles/bigip/vars/bigip.sql b/ansible/roles/bigip/files/bigip.sql similarity index 100% rename from ansible/roles/bigip/vars/bigip.sql rename to ansible/roles/bigip/files/bigip.sql diff --git a/ansible/roles/bigip/tasks/main.yml b/ansible/roles/bigip/tasks/main.yml index a0813d75..e722854e 100644 --- a/ansible/roles/bigip/tasks/main.yml +++ b/ansible/roles/bigip/tasks/main.yml @@ -1,41 +1,68 @@ ---- +--- +- name: Create bigip directory if it does not exist + ansible.builtin.file: + path: /etc/postgresql/16/db/bigip + state: directory + mode: '0755' + +- name: Copy the bigip schema to the remote + ansible.builtin.copy: + src: bigip.sql + dest: /etc/postgresql/16/db/bigip/bigip.sql + mode: '0755' + +- name: Run repmgr node check + ansible.builtin.command: repmgr -f /etc/repmgr.conf node check + become: true + become_user: postgres + register: node_check + changed_when: false + +# TODO: can do this automatically whenever we need to differentiate +# between `primary` and `standby` instead of passing them in manually +- name: Extract server role + ansible.builtin.set_fact: + server_role: "{{ node_check.stdout | regex_search('node is ([a-z]+)', '\\1') | first }}" + - name: Create bigip user and basic database - block: - # create user - - name: Create bigip user - community.postgresql.postgresql_user: - name: bigip - #db: bigip - password: '{{ bigip_password }}' - become: true - become_user: postgres - # create database - - name: Create a bigip database, with owner bigip - community.postgresql.postgresql_db: - name: bigip - owner: bigip - become: true - become_user: postgres - # create the schema - - name: Create the schema in bigip - community.postgresql.postgresql_script: - db: bigip - path: /etc/postgresql/16/db/bigip/bigip.sql - become: true - become_user: postgres - - name: Grant bigip priveleges on bigip database for table test - community.postgresql.postgresql_privs: - database: bigip - objs: test # only have rights on table test - privs: SELECT - role: bigip - grant_option: true - become: true - become_user: postgres - # this is allowed to fail on the secondary, should work on the primary and be replicated over - ignore_errors: true - -# loop over the two ips of the load balancers, to add to hba conf + # It would fail on the standby + when: server_role == "primary" + block: + - name: Create bigip user + community.postgresql.postgresql_user: + name: bigip + # db: bigip + password: "{{ bigip_password }}" + become: true + become_user: postgres + + - name: Create a bigip database, with owner bigip + community.postgresql.postgresql_db: + name: bigip + owner: bigip + become: true + become_user: postgres + + - name: Create the schema in bigip + community.postgresql.postgresql_script: + db: bigip + path: /etc/postgresql/16/db/bigip/bigip.sql + become: true + become_user: postgres + + - name: Grant bigip priveleges on bigip database for table test + community.postgresql.postgresql_privs: + database: bigip + objs: test # only have rights on table test + privs: SELECT + role: bigip + grant_option: true + become: true + become_user: postgres + +# loop over the two ips of the load balancers, to add to hba conf +# TODO: should postgres be restarted after changing the pg_hba.conf file? +# TODO: does repmgr synchronize this file? - name: Change hba conf to allow connections from bigip (load balancer) without an encrypted password community.postgresql.postgresql_pg_hba: dest: /etc/postgresql/16/main/pg_hba.conf @@ -45,6 +72,6 @@ databases: bigip users: bigip become: true - loop: '{{ load_balancer_ips }}' + loop: "{{ bigip_load_balancer_ips }}" loop_control: - label: "adding {{ item.address }} to hba conf" \ No newline at end of file + label: "adding {{ item.address }} to hba conf" diff --git a/ansible/roles/bigip/vars/main.yml b/ansible/roles/bigip/vars/main.yml deleted file mode 100644 index 8faa3fee..00000000 --- a/ansible/roles/bigip/vars/main.yml +++ /dev/null @@ -1,11 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -36376631623862333537653933356438333031333865343038316533323235363363646164643333 -6265643437643762363432323136363737366564393937640a633931626463303062353439333966 -61303262666137663839316334653763373036373064373234316563393232636438323761303833 -3237663365633164370a373334623862383963633461616466393339303333386632623330303737 -61346539613732336432616539616335383531386165333435653263323033393939343133393333 -37616665636662343062623235626163356635303531356164383264623162383365656632613137 -32383165626364356263616531383262373666653635343461366665323635353233623561323732 -31306262353866306539613638356161633533393261333936363562626361303038646139353737 -61356132313066623738373064646138343730633364653633366234386635613735373566643631 -3338383732343363326131643438626634383731373464393332 diff --git a/ansible/roles/deploy/defaults/main.yml b/ansible/roles/deploy/defaults/main.yml deleted file mode 100644 index 6d4b19a4..00000000 --- a/ansible/roles/deploy/defaults/main.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- -deploy_envars: - - LARD_CONN_STRING: - - STINFO_CONN_STRING: - -deploy_files: - - src: lard_ingestion.service - dest: /etc/systemd/system - mode: "0664" - - src: "{{ playbook_dir }}/../target/release/lard_ingestion" - dest: /usr/local/bin - mode: "0755" - - src: "{{ playbook_dir }}/../ingestion/resources" - dest: /usr/local/bin - mode: "0755" diff --git a/ansible/roles/deploy/files/lard_ingestion.service b/ansible/roles/deploy/files/lard_ingestion.service index 7048c363..1540766c 100644 --- a/ansible/roles/deploy/files/lard_ingestion.service +++ b/ansible/roles/deploy/files/lard_ingestion.service @@ -5,6 +5,7 @@ Description=lard ingestion service User=lard Group=lard WorkingDirectory=/usr/local/bin +EnvironmentFile=/etc/systemd/lard_ingestion.var ExecStart=/usr/local/bin/lard_ingestion lard Restart=on-failure diff --git a/ansible/roles/deploy/files/var_file b/ansible/roles/deploy/files/var_file new file mode 100644 index 00000000..6fd5963a --- /dev/null +++ b/ansible/roles/deploy/files/var_file @@ -0,0 +1,14 @@ +$ANSIBLE_VAULT;1.1;AES256 +33616337636662313835653362653263623134346336316430383439646165323230303034343133 +3235366163383130343830653266363638643933623533350a323762336235363739363030373739 +65303039663836623863373332643466363630363032353538323162336339333765316632336131 +3063306264666432620a346135636265393739313461653338363061613137363335633034643732 +61356362666661663038323062656138393135626531363262323131373537313531323138633532 +33613666616635646264373739623538646561383266346130653537623839613233643139353161 +64383534613461646235333536313761666538353561633733316133633139373030656330336264 +36383230626436636465633530366335346230663530626433623733653535653138613532303330 +36643763643666626561646335343233383663333735323265623631656562383834383038386364 +61623637656235656636646130333037346531343166383864313966336537626230363063316630 +62323738383239616166346635383131616461613461346539633562363830643935643664353465 +38613132383765313139343132303739333334666231623866383233373236633666363964623339 +3039 diff --git a/ansible/roles/deploy/tasks/main.yml b/ansible/roles/deploy/tasks/main.yml index 222dfa5a..b2d8b883 100644 --- a/ansible/roles/deploy/tasks/main.yml +++ b/ansible/roles/deploy/tasks/main.yml @@ -3,6 +3,7 @@ ansible.builtin.group: name: lard state: present + become: true - name: Create lard user ansible.builtin.user: @@ -12,8 +13,8 @@ append: true state: present create_home: false + become: true -# TODO: should we deploy in non root user? - name: Copy files to server ansible.builtin.copy: src: "{{ item.src }}" @@ -22,14 +23,19 @@ owner: root group: root become: true - loop: "{{ deploy_files }}" - -- name: Import environment variables # noqa: command-instead-of-module - ansible.builtin.command: systemctl import-environment LARD_CONN_STRING STINFO_CONN_STRING - # TODO: ansible docs say that 'environment:' is "not a recommended way to pass in confidential data." - environment: "{{ deploy_envars }}" - become: true - changed_when: false + loop: + - src: lard_ingestion.service + dest: /etc/systemd/system + mode: "0664" + - src: var_file + dest: /etc/systemd/lard_ingestion.var + mode: "0664" + - src: "{{ playbook_dir }}/../target/release/lard_ingestion" + dest: /usr/local/bin + mode: "0755" + - src: "{{ playbook_dir }}/../ingestion/resources" + dest: /usr/local/bin + mode: "0755" - name: Start LARD ingestion service ansible.builtin.systemd: @@ -38,8 +44,3 @@ state: restarted enabled: true become: true - -- name: Unset environment variables # noqa: command-instead-of-module - ansible.builtin.command: systemctl unset-environment LARD_CONN_STRING STINFO_CONN_STRING - become: true - changed_when: false diff --git a/ansible/roles/movefloatingip/tasks/main.yml b/ansible/roles/movefloatingip/tasks/main.yml deleted file mode 100644 index a627098a..00000000 --- a/ansible/roles/movefloatingip/tasks/main.yml +++ /dev/null @@ -1,3 +0,0 @@ -# roles/movefloatingip/tasks/main.yml -- name: Movefloatingip - import_tasks: movefloatingip.yml \ No newline at end of file diff --git a/ansible/roles/movefloatingip/tasks/movefloatingip.yml b/ansible/roles/movefloatingip/tasks/movefloatingip.yml deleted file mode 100644 index 26ab05ce..00000000 --- a/ansible/roles/movefloatingip/tasks/movefloatingip.yml +++ /dev/null @@ -1,59 +0,0 @@ ---- -# Switch over the primary's particular floating ip -# this makes sense to do after successfully switching over, -# however it means that the stuff writing to the primary needs to be -# robust enough to handle getting told the db is in a read only state for a short period. -- name: Move primary floating ip - block: - # remove from old primary - - name: Detach floating ip address that we keep connected to the primary - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ name_primary }}' - state: absent - network: public - floating_ip_address: '{{ primary_floating_ip }}' - - - name: Gather information about new primary server - openstack.cloud.server_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ name_standby }}' - register: new_primary_server - - - name: Print out the ipalias port information for the server - ansible.builtin.debug: - msg: "Server {{ new_primary_server.servers[0].addresses.ipalias }}" - - # add to what is now primary (used to be standby) - - name: Attach floating ip address that we keep connected to the primary - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ new_primary_server.servers[0].id }}' - state: present - reuse: true - network: public - fixed_address: '{{ new_primary_server.servers[0].addresses.ipalias[0].addr }}' - floating_ip_address: '{{ primary_floating_ip }}' - wait: true - timeout: 60 - when: new_primary_server.servers[0].addresses.ipalias | length <=1 - # unfortunately it seems that attaching the floating ip results in a timeout - # even though it actually succeeds - ignore_errors: true - -- name: Check floating ip is attached - openstack.cloud.floating_ip_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - floating_ip_address: '{{ primary_floating_ip }}' - register: fip - -# this will not run if the ip is not now on the right vm -- name: Print out the floating ip information to confirm its ok - ansible.builtin.debug: - msg: "Floating ip {{ fip }}" - become: true - when: fip.floating_ips[0].port_details.device_id == new_primary_server.servers[0].id \ No newline at end of file diff --git a/ansible/roles/networks/tasks/create-ipalias-network.yml b/ansible/roles/networks/tasks/create-ipalias-network.yml deleted file mode 100644 index 7b8ee6dc..00000000 --- a/ansible/roles/networks/tasks/create-ipalias-network.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- - - name: Create ipalias network - openstack.cloud.network: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ ipalias_network_name }}' - external: false - state: present - run_once: true - - - name: Create ipalias network subnet - openstack.cloud.subnet: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - network_name: '{{ ipalias_network_name }}' - name: '{{ ipalias_network_name }}-subnet' - cidr: 192.168.20.0/24 - state: present - dns_nameservers: '{{ met_dns[ostack_region] }}' - run_once: true - - - name: Connect ipalias network to public network - openstack.cloud.router: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: ipalias-router - network: public - interfaces: - - '{{ ipalias_network_name }}-subnet' - run_once: true - - - name: Remove default gateway for subnet - openstack.cloud.subnet: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - network_name: '{{ ipalias_network_name }}' - name: '{{ ipalias_network_name }}-subnet' - cidr: '{{ ipalias_ostack_network_cidr }}' - no_gateway_ip: true - state: present - run_once: true diff --git a/ansible/roles/networks/tasks/create-project-network.yml b/ansible/roles/networks/tasks/create-project-network.yml deleted file mode 100644 index 1eff31cc..00000000 --- a/ansible/roles/networks/tasks/create-project-network.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- - - name: Create private network - openstack.cloud.network: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: '{{ ostack_network_name }}' - external: false - - - name: Create private network subnet - openstack.cloud.subnet: - state: present - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - network_name: '{{ ostack_network_name }}' - name: '{{ ostack_network_name }}-subnet' - cidr: '{{ ostack_network_cidr }}' - dns_nameservers: '{{ met_dns[ostack_region] }}' - - - name: Connect private network to public network - openstack.cloud.router: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: public-router - network: public - interfaces: - - '{{ ostack_network_name }}-subnet' diff --git a/ansible/roles/networks/tasks/create-project-security-group.yml b/ansible/roles/networks/tasks/create-project-security-group.yml deleted file mode 100644 index e4ebe625..00000000 --- a/ansible/roles/networks/tasks/create-project-security-group.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- - - name: Create security groups - openstack.cloud.security_group: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ item }}' - description: Created with Ansible - loop: '{{ security_groups | map(attribute="name") | list | unique }}' - - - name: Populate security groups - openstack.cloud.security_group_rule: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - security_group: '{{ item.name }}' - protocol: tcp - port_range_max: "{{ item.rule.port }}" - port_range_min: "{{ item.rule.port }}" - remote_ip_prefix: "{{ item.rule.subnet }}" - loop: '{{ security_groups }}' - loop_control: - label: "updating security group {{ item.name }} with rule {{ item.rule }}" diff --git a/ansible/roles/networks/tasks/main.yml b/ansible/roles/networks/tasks/main.yml deleted file mode 100644 index 10a5623c..00000000 --- a/ansible/roles/networks/tasks/main.yml +++ /dev/null @@ -1,12 +0,0 @@ -# roles/networks/tasks/main.yml - - name: Create the project network (if it doesn't exist) - import_tasks: create-project-network.yml - - - name: Create the project security group (if it doesn't exist) - import_tasks: create-project-security-group.yml - - - name: Create the ipalias network (if it doesn't exist) - import_tasks: create-ipalias-network.yml - - - name: Create ping security group - import_tasks: open-for-ping.yml \ No newline at end of file diff --git a/ansible/roles/networks/vars/main.yml b/ansible/roles/networks/vars/main.yml deleted file mode 100644 index 4071b6ba..00000000 --- a/ansible/roles/networks/vars/main.yml +++ /dev/null @@ -1,102 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -63316462303232663161396533306631623963643536613865363931316530333935323339343165 -3334393663303564313730656263323461313133336263350a663637386337626430306430353138 -64326339663966643130363865373431656663333234666465363630616366376235346662373366 -3365333964303362660a646139336135343265613032353636616561323435366530633865323635 -63626239636465633662666262356665333162653563376266373530323562396561626535363366 -37333131353737353033313461613839366161623936656666396131646239303464623165616630 -66333866316231356161333464383062383634383530636162366464353361316532393033373963 -61646134623938633761303536646432616237316364323434646634393465363438616532313236 -61613639663835313635306265663263373639333062383937633932636166333437626461663932 -36643666613763626261316333386332623166333433306661623531623232323662396364643133 -31356665663935343436646136393935653439306332656332303834363730623937363930353737 -36323564396333326433653463313136663331623862663239663234613135333162316466383136 -39376638633063643732343764366535323166663063323034306637376665336632653264613763 -36613139376237373035393236336331653235656435303631323730323163373938666635383038 -66323662316137393137613235373261626639623331376632653935343066303034653534316236 -35653339313864313835316462383164323633323332366133343839376463623966353635363934 -35616636326432336631613736363638663439336232623064643631663830313330623736366632 -36643031383032353738653131643262383936396166353331336137333265393561626163633464 -37616662646363663933656530366633626338386530613835626533306164396336383561616533 -39636539653134366232346330386239396133313132363437313238306333623232356462386434 -37343662646562353031616535346336613131343838616532623366613136386639363763323734 -33646138313162393763653363633435623965376332633463313264333636663238366666376161 -36626365333937323430363035353439653338633838326331303830666261653866323634633434 -32343738636636356539643762393534333739623539326234613639633435636165626433616337 -35666564636463623765393232336432386636343133383537363061343064613336663665316666 -33386535376165303966306530653161383735323761363534326335653732346633333865366135 -63303464303138653937646264316164613265353934316334366335626231363832613365363532 -65343636643937376136386235366235643363343166353462663730346161393362623730643965 -39303062666266376431326333663933356465373233653835363866636237363565376662643430 -31656236623131646633643632366233633066653762323438383538366232363634353331313366 -66396331326434343362663931353866373234306663623631383330333533656139623565313336 -36303136333535613537393231613135613935323436303037376134653831353530666266376130 -32353834343461393133646134333065663239326535313831366630303361333566376532346462 -37363635366634323531616536393431656365613436666433616530356538376531656130366531 -37656130303132356432363930626632356336653235396362643062363662336530646333356538 -30373738353836363137306363613433376232366239623134643035343066653863623766653837 -62313039663666313033636331346131386632303430623034326664396663356262336363366265 -31393937373261353963623064653737343137376461353231656365373934326263376464643964 -33336566643131643163636162343862646665623139653639643439613261323366333634326438 -63633932333866346164616166343063386234383732333863333034346436356637653665626463 -34366234643339343162373663623136303236313266356164373362636237393631303866383034 -62616630663132613566336663633265356561646662333764383563353966383930613137653833 -62383661643739313230316332626236366435326662303831343936336166313033373561363037 -39393239613531643437346466383234393263643034363066366262316535313532326639356637 -66313762626232373839626638343465663935333061383839373963353833623932616433373336 -30363465623362326466323166353266346239326134376230633631653739393430326663316133 -61356431393665646664623135306538326430336137383931316165663561306262353239653765 -30636563626665363337623135346663383330626663373633336337623662353562393732646665 -37633636336564386364343632636532376536366165623032636266363765343864306234613735 -32306431393261313230326666616162303664396464303236643666336566313065663562613766 -65316132613339343864383635636433333933356664336435343134666536396162663031353532 -32373765323733656533353965333564393132656238333136663838396137336439393730303738 -34653130386130333038643833656235633531333839663462656262336262396362643766653064 -36633832346431346538306263356366613661393535356333386537383464373436623339623334 -34353038383563393334373134353734666564353639643763346166373862323866613839373539 -38643130346665336634393466356263383733613134333162653265393065633434616261323462 -65346264376534343735643039396538376637326639663966643939656663373636396566643638 -37366666623031323138356164363038393538383261313832366262636535643163663832613037 -31336136626134336661626464623439636533303731643639353664343163346332623032396430 -64383433643832343962343130636230626165376466386635363332633563333865633830383830 -66626334626433626339363837633235316636393163383464373638316132386363393739306230 -34343033393533303135343830333531626238393964306137323564623962313032633562366139 -35323261323531663335613039613764353262343433646537393830356135333265326238396663 -32636261623163633737666565666631663736333964363839373234633663343662366364646161 -63613365386335373637353633326434373632393334613131386439303339346530316334326364 -30336662653037656339393230323866643536643366383232393038323138323532636235653832 -61376338323839383539313364633936643934303264616131626233396563656163383836653132 -39393131393730343935663562386537313032383835663963653365343738373437303263313435 -32316365633333326131363034323463373065653930376365633834396137653634303038323364 -30303739363230353235666233636464373635396433616535643364666638656339653065366637 -35303531656665333334636535613631623133303662373235393231396234333566396435633839 -34663063366163653761336661386633656664313464663437323036373533323464373634616237 -64633666663033623234376630393361616638303166393230626336666236643462363565656431 -30626239323963376361353065383261383033326238613635643062373439616266313361306633 -64393263343130663765326562366266366538373130316638613734613134333030613831383938 -62393263343337306230363733326638366538393230313631383033313738346536656361623338 -34323131356230376530623035613133636434643766383162623363633464366661353031303863 -31396135333236373631363162326235313037343461656430376330383266613733656162616431 -31373231653361313465653233613537386661303737633730613033633334343964336665623639 -63393763343962346439653335333366346238643435666631356338366637316634373861383631 -38316563313866663561626632306635383062633237343038653032396266666666336436636138 -31666330323531393362366535326538626463633439393237633131376366393136386264306433 -33663434373662383632653264386566643132613938373062333635666138393136353035663666 -61636539353038363331306465383336303564633664623061326665383565616334363336313635 -37336664313334663237343762373362306239303362613966313765396666656663646636376338 -34633266343763306566633261343535653238663433613238633331306135626165366265613539 -35313334353238633532636139663363383130373066643230653535613964323061373862633433 -66343661323030666534373866363130316265346535303266616663316333666665626432386334 -36323865313661313365353666663563313232316531373761323534366266353462656132373738 -61393134656139393966636334326338643434626134333637626364326263333534643338383038 -34313339626263613566376539633737333532356131363561626364303738653066366337343935 -33323235616564316538356431623164373836356365323766613136323266616365646465613134 -30326161623665636166383636653266323739663236326162356238663865303463663964383463 -35396535623263316364366537626630643131633866396639386139373137663366636332373034 -66366231393932373230363161623039623463353732323962393361643238613130633835386231 -66373534363562663163333532653664313664306539303362346535663131303037383231616362 -30663635343563393163616333396534366637303430633264643161653865643264386262396166 -64626562396238643566326361336538646436353166343639383533386635356436306666396531 -38333836353961626431646635343032346232613464336531633862386439353131376130656632 -35356639303162663862663036396337336233613534613431303165646239316466366535613834 -3839 diff --git a/ansible/roles/ostack/defaults/main.yml b/ansible/roles/ostack/defaults/main.yml new file mode 100644 index 00000000..84c26b90 --- /dev/null +++ b/ansible/roles/ostack/defaults/main.yml @@ -0,0 +1,40 @@ +--- +# PUBLIC +ostack_cloud: lard +ostack_region: Ostack2-EXT +ostack_ipalias_network_name: ipalias +ostack_state: present + +# PRIVATE +# networks +ostack_network_name: +ostack_network_cidr: +ostack_netword_dns: # dict[ostack_region -> list(ipv4)] +ostack_network_security_groups: + - name: + rule: + subnet: + port: +ostack_ipalias_network_cidr: + +# vm_create +ostack_vm_image: +ostack_vm_flavor: +ostack_vm_key_name: +ostack_vm_security_groups: +ostack_vm_volume_type: +ostack_vm_volume_size: + +# vm_format +ostack_mount_device: +ostack_mount_point: +ostack_repmgr_password: + +# create_primary / create_standby +ostack_db_password: +ostack_floating_ip: +ostack_primary_ip: + +# move_floating_ip +ostack_primary: +ostack_standby: diff --git a/ansible/roles/primarystandbysetup/readme.md b/ansible/roles/ostack/replication_setup.md similarity index 100% rename from ansible/roles/primarystandbysetup/readme.md rename to ansible/roles/ostack/replication_setup.md diff --git a/ansible/roles/ostack/tasks/create_primary.yml b/ansible/roles/ostack/tasks/create_primary.yml new file mode 100644 index 00000000..c2180d48 --- /dev/null +++ b/ansible/roles/ostack/tasks/create_primary.yml @@ -0,0 +1,248 @@ +--- +# set up a role and provide suitable entries in pg_hba.conf with the database +# field set to replication + +# ensure max_wal_senders is set to a sufficiently large value in the conf file +# (also possibly max_replication_slots?) When running a standby server, you +# must set this parameter to the same or higher value than on the primary +# server. Otherwise, queries will not be allowed in the standby server. + +# set wal_keep_size to a value large enough to ensure that WAL segments are not +# recycled too early, or configure a replication slot for the standby? if there +# is a WAL archive accessible to the standby this may not be needed? + +# On systems that support the keepalive socket option, setting +# tcp_keepalives_idle, tcp_keepalives_interval and tcp_keepalives_count helps +# the primary promptly notice a broken connection. + +# example auth +# Allow the user "foo" from host 192.168.1.100 to connect to the primary +# as a replication standby if the user's password is correctly supplied. +# +# TYPE DATABASE USER ADDRESS METHOD +# host replication foo 192.168.1.100/32 md5 +- name: Create a new database with name lard + community.postgresql.postgresql_db: + name: lard + become: true + become_user: postgres + +- name: Copy the db folder to the remote + ansible.builtin.copy: + src: "{{ playbook_dir }}/../db/" + dest: /etc/postgresql/16/db/ + mode: "0755" + become: true + +# TODO: automatically loop over the sql files in order (needs prepending IDs?) +# with_fileglob: "/etc/postgresql/16/db/*" +- name: Create schemas and tables in lard + community.postgresql.postgresql_script: + db: lard + path: "/etc/postgresql/16/db/{{ item }}" + become: true + become_user: postgres + loop: + - public.sql + - partitions_generated.sql + - labels.sql + - flags.sql + +- name: Connect to lard database, create lard_user + community.postgresql.postgresql_user: + db: lard + name: lard_user + password: "{{ ostack_db_password }}" + role_attr_flags: SUPERUSER # not desired, but the privelege granting doesn't seem to work? + become: true + become_user: postgres + +# - name: Grant lard_user priveleges on lard database +# community.postgresql.postgresql_privs: +# type: database +# db: lard +# privs: ALL +# role: lard_user +# become: true +# become_user: postgres + +# MAKE IT THE PRIMARY +- name: Set wal_level parameter + community.postgresql.postgresql_set: + name: wal_level + value: replica # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL + become: true + become_user: postgres + +- name: Set hot_standby parameter + community.postgresql.postgresql_set: + name: hot_standby + value: true + become: true + become_user: postgres + +- name: Set hot_standby_feedback parameter + community.postgresql.postgresql_set: + name: hot_standby_feedback + value: true + become: true + become_user: postgres + +- name: Set max_wal_senders parameter + community.postgresql.postgresql_set: + name: max_wal_senders + value: 10 + become: true + become_user: postgres + +- name: Set wal_log_hints parameter # needs to be enabled to use pg_rewind + # https://www.postgresql.org/docs/current/app-pgrewind.html + community.postgresql.postgresql_set: + name: wal_log_hints + value: true + become: true + become_user: postgres + +- name: Set max_replication_slots parameter + community.postgresql.postgresql_set: + name: max_replication_slots + value: 10 + become: true + become_user: postgres + +# make it SYNCHRONOUS REPLICATION (without the next two settings it would be asynchronous) +- name: Set synchronous_standby_names parameter + community.postgresql.postgresql_set: + name: synchronous_standby_names # https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES + value: "*" # all the standbys + become: true + become_user: postgres + +- name: Set synchronous_commit parameter + community.postgresql.postgresql_set: + name: synchronous_commit # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT + value: "on" # will not give standby query consistency (tradeoff for better write performance), but will give standby durable commit after OS crash + become: true + become_user: postgres + +# repmgr +# https://www.repmgr.org/docs/current/quickstart-repmgr-conf.html +- name: Create a repmgr.conf if it does not exist + ansible.builtin.file: + path: /etc/repmgr.conf + state: touch + mode: "0755" + become: true + +- name: Set contents of repmgr.conf + ansible.builtin.copy: + dest: "/etc/repmgr.conf" + content: | + node_id=1 + node_name='{{ inventory_hostname }}' + conninfo='host={{ ansible_host }} user=repmgr dbname=repmgr connect_timeout=2' + data_directory='{{ ostack_mount_point }}/16/main' + service_start_command='sudo /bin/systemctl start postgresql.service' + service_stop_command='sudo /bin/systemctl stop postgresql.service' + service_restart_command='sudo /bin/systemctl restart postgresql.service' + service_reload_command='sudo /bin/systemctl reload postgresql.service' + mode: "0755" + become: true + +# https://www.repmgr.org/docs/current/quickstart-primary-register.html +- name: Run repmgr to register the primary + ansible.builtin.command: repmgr -f /etc/repmgr.conf primary register -F # only need -F if rerunning + become: true + become_user: postgres + register: register_results + # changed_when: "'NOTICE: primary node record (ID: 1) registered' in register.stderr_lines" + changed_when: true + +- name: Print out the register_primary_results + ansible.builtin.debug: + msg: "repmgr {{ register_results }}" + +# # STUFF FOR REPLICATION (do not need if using repmgr) +# - name: Create replicator user with replication priveleges +# community.postgresql.postgresql_user: +# name: replicator +# password: '{{ replicator_password }}' +# role_attr_flags: REPLICATION +# become: true +# become_user: postgres + +# # also specifically allow the replicator user +# - name: Change hba conf to allow replicator to connect +# community.postgresql.postgresql_pg_hba: +# dest: /etc/postgresql/16/main/pg_hba.conf +# databases: replication +# contype: host +# users: replicator +# #address: all +# address: '{{ standby_host }}' +# method: trust # seems to hang with md5, how to make auth work? +# become: true + +# # create replication slot +# - name: Create physical replication slot if doesn't exist +# become_user: postgres +# community.postgresql.postgresql_slot: +# slot_name: replication_slot +# #db: lard +# become: true + +# make sure these changes take effect? +- name: Restart service postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Attach primary floating ip + delegate_to: localhost + block: + - name: Gather information about primary server + openstack.cloud.server_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ inventory_hostname }}" + register: primary_server + + - name: Print out the ipalias port information for the server + ansible.builtin.debug: + msg: "Server {{ primary_server.servers[0].addresses.ipalias }}" + + # give the primary a particular floating ip + - name: Attach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ primary_server.servers[0].id }}" + reuse: true + network: public + fixed_address: "{{ primary_server.servers[0].addresses.ipalias[0].addr }}" + floating_ip_address: "{{ ostack_floating_ip }}" + wait: true + # unfortunately it seems that attaching the floating ip results in a + # timeout even though it actually succeeds + ignore_errors: true + register: attach_result + when: primary_server.servers[0].addresses.ipalias | length <=1 + + - name: Print out result of attaching floating ip address + ansible.builtin.debug: + msg: "{{ attach_result }}" + +- name: Check floating ip is attached + openstack.cloud.floating_ip_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + floating_ip_address: "{{ ostack_floating_ip }}" + register: fip_info + delegate_to: localhost + +# this will not run if the ip is not now on the vm +- name: Print out the floating ip information to confirm its ok + ansible.builtin.debug: + msg: "Floating ip {{ fip_info }}" + when: fip_info.floating_ips[0].port_details.device_id == primary_server.servers[0].id diff --git a/ansible/roles/ostack/tasks/create_standby.yml b/ansible/roles/ostack/tasks/create_standby.yml new file mode 100644 index 00000000..e3cd080d --- /dev/null +++ b/ansible/roles/ostack/tasks/create_standby.yml @@ -0,0 +1,144 @@ +--- +# create standby.signal file in data directory + +# configure streaming WAL primary_conninfo needs a libpq connection string (ip +# address + other details needed to connect to primary server) + +# since we want the standby to be able to operate as the primary, we need to +# configure the WAL archiving, connections, and auth like the primary example: +# primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass +# options=''-c wal_sender_timeout=5000''' restore_command = 'cp +# /path/to/archive/%f %p' archive_cleanup_command = 'pg_archivecleanup +# /path/to/archive %r' + +# add the following line to the postgresql.conf file on the standby The standby +# connects to the primary that is running on host 192.168.1.50 and port 5432 as +# the user "foo" whose password is "foopass". primary_conninfo = +# 'host=192.168.1.50 port=5432 user=foo password=foopass' + +# use the replication slot on the primary (in file after the primary_conninfo) +# primary_slot_name = 'node_a_slot' repmgr +# https://www.repmgr.org/docs/current/quickstart-standby-clone.html must be +# done before the standby is put into read only mode (therefore not idempotent) +- name: Create a repmgr.conf if it does not exist + ansible.builtin.file: + path: /etc/repmgr.conf + state: touch + mode: "0755" + become: true + +- name: Set contents of repmgr.conf + ansible.builtin.copy: + dest: "/etc/repmgr.conf" + content: | + node_id=2 + node_name='{{ inventory_hostname }}' + conninfo='host={{ ansible_host }} user=repmgr dbname=repmgr connect_timeout=2' + data_directory='{{ ostack_mount_point }}/16/main' + service_start_command='sudo /bin/systemctl start postgresql.service' + service_stop_command='sudo /bin/systemctl stop postgresql.service' + service_restart_command='sudo /bin/systemctl restart postgresql.service' + service_reload_command='sudo /bin/systemctl reload postgresql.service' + mode: "0755" + become: true + +- name: Stop service postgres, if running + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +# https://www.repmgr.org/docs/current/quickstart-standby-clone.html +# TODO: can you use `ostack_floating_ip` here? +- name: Run repmgr to dry run clone + ansible.builtin.command: repmgr -h '{{ ostack_primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run + become: true + become_user: postgres + register: dry_run_clone_results + changed_when: false + +- name: Print out the dry_run_clone_results + ansible.builtin.debug: + msg: "repmgr {{ dry_run_clone_results }}" + +- name: Run repmgr to clone standby from primary + ansible.builtin.command: repmgr -h '{{ ostack_primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone -F + become: true + register: clone_results + become_user: postgres + # changed_when: "'NOTICE: standby clone (using pg_basebackup) complete' in clone_results.stderr_lines" + changed_when: true + +- name: Print out the clone_results + ansible.builtin.debug: + msg: "repmgr {{ clone_results }}" + +# try to clean up so can run standby clone ? +# - name: Recursively remove directory +# ansible.builtin.file: +# path: /mnt/ssd-b/16/main +# state: absent +# become: true +# - name: Create a main directory if it does not exist +# ansible.builtin.file: +# path: /mnt/ssd-b/16/main +# state: directory +# mode: '0700' +# become: true +# become_user: postgres + +# https://www.postgresql.org/docs/current/app-pgbasebackup.html +# NOTE: this part is not idempotent, so if a db is already in the dir, it will +# fail hence the stuff above that means this should not be run on a database with +# data!!! not needed if using repmgr, since clone calls this +# - name: Run pg_basebackup to initialize the replica / standby +# ansible.builtin.shell: | +# export PGPASSWORD="{{ replicator_password }}" && +# pg_basebackup --pgdata=/mnt/ssd-b/16/main -R --slot=replication_slot --user=replicator --host={{ primary_host }} --port=5432 +# args: +# executable: /bin/bash +# become: true +# become_user: postgres +# register: basebackup_results + +# - name: Print out the basebackup_results +# debug: msg="backup {{ basebackup_results }}" + +- name: Restart service postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Waits for port 5432 to be available, don't check for initial 10 seconds + ansible.builtin.wait_for: + host: 0.0.0.0 + port: 5432 + delay: 10 + state: started + +# https://www.repmgr.org/docs/current/quickstart-register-standby.html +- name: Run repmgr to register the standby + ansible.builtin.command: repmgr -f /etc/repmgr.conf standby register + become: true + become_user: postgres + register: register_results + # changed_when: "'INFO: standby registration complete' in register_results.stderr_lines" + changed_when: true + +- name: Print out the register_standby_results + ansible.builtin.debug: + msg: "repmgr {{ register_results.stderr }}" + +# run some sql... to confirm clone? +- name: Do some sql to test for the existence of lard...? + community.postgresql.postgresql_query: + db: lard + query: select count(*) from timeseries + register: query_results + become: true + become_user: postgres + +- name: Print out the query + ansible.builtin.debug: + msg: "Query {{ query_results }}" diff --git a/ansible/roles/ostack/tasks/move_floating_ip.yml b/ansible/roles/ostack/tasks/move_floating_ip.yml new file mode 100644 index 00000000..d49318ee --- /dev/null +++ b/ansible/roles/ostack/tasks/move_floating_ip.yml @@ -0,0 +1,63 @@ +--- +# Switch over the primary's particular floating ip +# this makes sense to do after successfully switching over, +# however it means that the stuff writing to the primary needs to be +# robust enough to handle getting told the db is in a read only state for a short period. +- name: Move primary floating ip + block: + # remove from old primary + - name: Detach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ ostack_primary }}" + state: absent + network: public + floating_ip_address: "{{ ostack_floating_ip }}" + + - name: Gather information about new primary server + openstack.cloud.server_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ ostack_standby }}" + register: new_primary_server + + - name: Print out the ipalias port information for the server + ansible.builtin.debug: + msg: "Server {{ new_primary_server.servers[0].addresses.ipalias }}" + + # add to what is now primary (used to be standby) + # unfortunately it seems that attaching the floating ip results in a timeout + # even though it actually succeeds + - name: Attach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ new_primary_server.servers[0].id }}" + state: present + reuse: true + network: public + fixed_address: "{{ new_primary_server.servers[0].addresses.ipalias[0].addr }}" + floating_ip_address: "{{ ostack_floating_ip }}" + wait: true + # timeout: 60 + ignore_errors: true + register: attach_result + when: new_primary_server.servers[0].addresses.ipalias | length <=1 + + - name: Print out result of attaching floating ip address + ansible.builtin.debug: + msg: "{{ attach_result }}" + +- name: Check floating ip is attached + openstack.cloud.floating_ip_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + floating_ip_address: "{{ ostack_floating_ip }}" + register: fip_info + +# this will not run if the ip is not now on the right vm +- name: Print out the floating ip information to confirm its ok + ansible.builtin.debug: + msg: "Floating ip {{ fip_info }}" + when: fip_info.floating_ips[0].port_details.device_id == new_primary_server.servers[0].id diff --git a/ansible/roles/ostack/tasks/networks.yml b/ansible/roles/ostack/tasks/networks.yml new file mode 100644 index 00000000..3642bdfe --- /dev/null +++ b/ansible/roles/ostack/tasks/networks.yml @@ -0,0 +1,12 @@ +--- +- name: Create the project network (if it doesn't exist) + ansible.builtion.import_tasks: networks/create_project_network.yml + +- name: Create the project security group (if it doesn't exist) + ansible.builtion.import_tasks: networks/create_project_security_group.yml + +- name: Create the ipalias network (if it doesn't exist) + ansible.builtion.import_tasks: networks/create_ipalias_network.yml + +- name: Create ping security group + ansible.builtion.import_tasks: networks/open_for_ping.yml diff --git a/ansible/roles/ostack/tasks/networks/create_ipalias_network.yml b/ansible/roles/ostack/tasks/networks/create_ipalias_network.yml new file mode 100644 index 00000000..df0250aa --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create_ipalias_network.yml @@ -0,0 +1,42 @@ +--- +- name: Create ipalias network + openstack.cloud.network: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: ipalias + external: false + state: present + run_once: true + +- name: Create ipalias network subnet + openstack.cloud.subnet: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: ipalias + name: ipalias-subnet + cidr: "{{ ostack_ipalias_network_cidr }}" + state: present + dns_nameservers: "{{ ostack_networks_dns[ostack_region] }}" + run_once: true + +- name: Connect ipalias network to public network + openstack.cloud.router: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: ipalias-router + network: public + interfaces: + - ipalias-subnet + run_once: true + +- name: Remove default gateway for subnet + openstack.cloud.subnet: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: ipalias + name: ipalias-subnet + cidr: "{{ network_ostack_network_cidr }}" + no_gateway_ip: true + state: present + run_once: true diff --git a/ansible/roles/ostack/tasks/networks/create_project_network.yml b/ansible/roles/ostack/tasks/networks/create_project_network.yml new file mode 100644 index 00000000..62a97538 --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create_project_network.yml @@ -0,0 +1,28 @@ +--- +- name: Create private network + openstack.cloud.network: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: "{{ ostack_network_name }}" + external: false + +- name: Create private network subnet + openstack.cloud.subnet: + state: present + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: "{{ ostack_network_name }}" + name: "{{ ostack_network_name }}-subnet" + cidr: "{{ ostack_network_cidr }}" + dns_nameservers: "{{ networks_dns[ostack_region] }}" + +- name: Connect private network to public network + openstack.cloud.router: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: public-router + network: public + interfaces: + - "{{ ostack_network_name }}-subnet" diff --git a/ansible/roles/ostack/tasks/networks/create_project_security_group.yml b/ansible/roles/ostack/tasks/networks/create_project_security_group.yml new file mode 100644 index 00000000..4fa115dd --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create_project_security_group.yml @@ -0,0 +1,21 @@ +--- +- name: Create security groups + openstack.cloud.security_group: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ item }}" + description: Created with Ansible + loop: '{{ ostack_network_security_groups | map(attribute="name") | list | unique }}' + +- name: Populate security groups + openstack.cloud.security_group_rule: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + security_group: "{{ item.name }}" + protocol: tcp + port_range_max: "{{ item.rule.port }}" + port_range_min: "{{ item.rule.port }}" + remote_ip_prefix: "{{ item.rule.subnet }}" + loop: "{{ ostack_network_security_groups }}" + loop_control: + label: "updating security group {{ item.name }} with rule {{ item.rule }}" diff --git a/ansible/roles/networks/tasks/open-for-ping.yml b/ansible/roles/ostack/tasks/networks/open_for_ping.yml similarity index 54% rename from ansible/roles/networks/tasks/open-for-ping.yml rename to ansible/roles/ostack/tasks/networks/open_for_ping.yml index 0e383f15..88de7ae8 100644 --- a/ansible/roles/networks/tasks/open-for-ping.yml +++ b/ansible/roles/ostack/tasks/networks/open_for_ping.yml @@ -1,15 +1,15 @@ -### stuff needed for ping +--- - name: Create ping security group openstack.cloud.security_group: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" name: ping description: Created with Ansible - name: Populate ping security group openstack.cloud.security_group_rule: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" security_group: ping protocol: icmp - remote_ip_prefix: "157.249.0.0/16" \ No newline at end of file + remote_ip_prefix: "157.249.0.0/16" diff --git a/ansible/roles/ostack/tasks/vm_create.yml b/ansible/roles/ostack/tasks/vm_create.yml new file mode 100644 index 00000000..3805188d --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_create.yml @@ -0,0 +1,93 @@ +--- +- name: Create VM + openstack.cloud.server: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + availability_zone: "{{ ostack_availability_zone }}" + name: "{{ inventory_hostname }}" + image: "{{ ostack_vm_image }}" + flavor: "{{ ostack_vm_flavor }}" + network: "{{ ostack_network_name }}" + key_name: "{{ ostack_vm_key_name }}" + security_groups: "{{ ostack_vm_security_groups }}" + state: "{{ ostack_state }}" + # do not give ip, since want to assign a specific one in next step (so as to reuse the ones we have) + auto_ip: false + register: server + +- name: Print out the server information + ansible.builtin.debug: + msg: "Server {{ lookup('ansible.builtin.dict', server) }}" + +- name: Attach floating ip address + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ server.server.id }}" + reuse: true + network: public + # TODO: check that this is correct + floating_ip_address: "{{ ansible_host }}" + wait: true + timeout: 60 + +- name: Create Volume + openstack.cloud.volume: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + availability_zone: "{{ ostack_availability_zone }}" + name: "{{ inventory_hostname }}" + volume_type: "{{ ostack_vm_volume_type }}" + size: "{{ ostack_vm_volume_size }}" + register: volume + +- name: Print out the volume information + ansible.builtin.debug: + msg: "Volume {{ lookup('ansible.builtin.dict', volume) }}" + +- name: Attach a volume to a compute host + openstack.cloud.server_volume: + state: present + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + volume: "{{ volume.volume.id }}" + server: "{{ server.server.id }}" + device: /dev/vdb + +- name: Create port for ipalias and set security groups + openstack.cloud.port: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: "ipalias-{{ inventory_hostname }}" + network: "{{ ostack_ipalias_network_name }}" + security_groups: "{{ ostack_vm_security_groups }}" + +- name: Get port info + openstack.cloud.port_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + port: "ipalias-{{ inventory_hostname }}" + register: ipalias_port + +- name: Print out the port information + ansible.builtin.debug: + msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" + +# TODO: Can't this be done directly above +# by assigning `server.server.id` to `device_id`? +- name: Add the ipalias network to server + ansible.builtin.command: + argv: + - openstack + - --os-cloud + - "{{ ostack_cloud }}" + - --os-region-name + - "{{ ostack_region }}" + - server + - add + - port + - "{{ server.server.id }}" + - "ipalias-{{ inventory_hostname }}" + when: ipalias_port.ports.0.device_id | length <=0 + changed_when: true diff --git a/ansible/roles/ostack/tasks/vm_format.yml b/ansible/roles/ostack/tasks/vm_format.yml new file mode 100644 index 00000000..0bb73873 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format.yml @@ -0,0 +1,9 @@ +--- +- name: Netplan + ansible.builtin.import_tasks: vm_format/netplan.yml + +- name: Format and mount the disk + ansible.builtin.import_tasks: vm_format/mount_disk.yml + +- name: Install postgres + ansible.builtin.import_tasks: vm_format/install_postgres.yml diff --git a/ansible/roles/ostack/tasks/vm_format/install_postgres.yml b/ansible/roles/ostack/tasks/vm_format/install_postgres.yml new file mode 100644 index 00000000..23f39525 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/install_postgres.yml @@ -0,0 +1,140 @@ +--- +- name: Add postgres apt key by id from a keyserver + ansible.builtin.apt_key: + url: https://www.postgresql.org/media/keys/ACCC4CF8.asc + state: present + become: true + +- name: Add postgres repository into sources list + ansible.builtin.apt_repository: + repo: deb https://apt.postgresql.org/pub/repos/apt jammy-pgdg main + state: present + become: true + +- name: Install apt packages + ansible.builtin.apt: + pkg: + - nano + - postgresql-16 + - postgresql-16-repmgr # https://www.repmgr.org/docs/current/install-requirements.html + - pip # needed for installing psycopg2 + - acl # needed for becoming unpriveleged user (such as postgres) + update_cache: true + become: true + +- name: Install pip packages + ansible.builtin.pip: + name: "{{ item }}" + become: true + loop: + # TODO: should probably use psycopg3 instead, v2 is deprecated + - psycopg2-binary # dependency for ansible.community.postgresql + - openstacksdk + +# Make so the data is actually kept on the ssd mount +# First stop postgres service +- name: Stop service postgres, if running + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +- name: Rsync postgres directory to ssd mount + ansible.posix.synchronize: + archive: true + src: /var/lib/postgresql/ + dest: "{{ ostack_mount_point }}" + become: true + # synchronize runs by default on localhost + delegate_to: "{{ inventory_hostname }}" + +- name: Comment out original data_directory + ansible.builtin.replace: + dest: /etc/postgresql/16/main/postgresql.conf + regexp: '^data_directory' + replace: '#data_directory' + become: true + +- name: Modify postgresql config + ansible.builtin.blockinfile: + dest: /etc/postgresql/16/main/postgresql.conf + # NOTE: single quotes are mandatory here! + block: | + data_directory = '{{ ostack_mount_point }}16/main' + listen_addresses = '*' + become: true + +# probably want to restrict this once we know what will connect? +# but the security group rules should take care of limiting to met ranges +- name: Change hba conf to allow connections + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + contype: host + address: all # can we put met internal ip range(s)? + method: md5 + # users and database default to all + become: true + +# make sure these changes take effect +- name: Start up postgres service again + ansible.builtin.systemd_service: + name: postgresql + state: started + become: true + +# REPMGR +- name: Create repmgr user # https://www.repmgr.org/docs/current/quickstart-repmgr-user-database.html + community.postgresql.postgresql_user: + name: repmgr + # NOTE: single quotes are mandatory here! + password: '{{ ostack_repmgr_password }}' + role_attr_flags: SUPERUSER + become: true + become_user: postgres + +- name: Create a repmgr database, with owner repmgr + community.postgresql.postgresql_db: + name: repmgr + owner: repmgr + become: true + become_user: postgres + +- name: Change hba conf to allow repmgr to connect for replication + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + databases: replication + contype: host + users: repmgr + address: all + # address: '{{ standby_host }}' + method: trust + become: true + +- name: Change hba conf to allow repmgr to connect to the repmgr db + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + databases: repmgr + contype: host + users: repmgr + address: all + # address: '{{ standby_host }}' + method: trust + become: true + +- name: Restart postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Allow the postgres user to run /bin/systemctl restart, stop, start postgres + community.general.sudoers: + name: postgresql + user: postgres + commands: + - /bin/systemctl restart postgresql.service + - /bin/systemctl stop postgresql.service + - /bin/systemctl start postgresql.service + - /bin/systemctl reload postgresql.service + nopassword: true + become: true diff --git a/ansible/roles/ostack/tasks/vm_format/mount_disk.yml b/ansible/roles/ostack/tasks/vm_format/mount_disk.yml new file mode 100644 index 00000000..a31da5d3 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/mount_disk.yml @@ -0,0 +1,45 @@ +--- +- name: Create mount point + ansible.builtin.file: + path: "{{ ostack_mount_point }}" + state: directory + owner: ubuntu # change to postgres? + group: ubuntu # change to postgres? + mode: 'u=rw,g=rws,o=r' + become: true + +- name: Create ext4 filesystem on mount device + community.general.filesystem: + dev: '{{ ostack_mount_device }}' + fstype: ext4 + become: true + +- name: Read device information (always use unit when probing) + community.general.parted: + device: '{{ ostack_mount_device }}' + unit: MiB + register: sdb_info + become: true + +- name: Print out the device information + ansible.builtin.debug: + msg: "Partitions {{ sdb_info.partitions }}" + +# this also changes the fstab so its still there when rebooted! +- name: Mount the device on the mount point + ansible.posix.mount: + path: "{{ ostack_mount_point }}" + src: '{{ ostack_mount_device }}' + fstype: ext4 + state: mounted + become: true + +- name: Fetch the UUID of mounted device + ansible.builtin.command: blkid --match-tag UUID --output value '{{ ostack_mount_device }}' + changed_when: false + register: blkid_cmd + become: true + +- name: Print out the UUID + ansible.builtin.debug: + msg: "UUID {{ blkid_cmd.stdout }}" diff --git a/ansible/roles/ostack/tasks/vm_format/netplan.yml b/ansible/roles/ostack/tasks/vm_format/netplan.yml new file mode 100644 index 00000000..2a8790bb --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/netplan.yml @@ -0,0 +1,61 @@ +--- +- name: Get port info + openstack.cloud.port_info: + cloud: '{{ ostack_cloud }}' + region_name: '{{ ostack_region }}' + port: 'ipalias-{{ inventory_hostname }}' + register: ipalias_port + delegate_to: localhost + +- name: Print out the port information + ansible.builtin.debug: + msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" + +- name: IP alias netplan configuration + ansible.builtin.set_fact: + netplan_config: + network: + version: 2 + ethernets: + ens6: + dhcp4: true + dhcp4-overrides: + use-routes: false + match: + macaddress: '{{ ipalias_port.ports.0.mac_address }}' + set-name: ens6 + routes: + - to: 0.0.0.0/0 + via: '{{ ostack_ipalias_network_cidr | ansible.utils.ipaddr("net") | ansible.utils.ipaddr("1") | ansible.utils.ipaddr("address") }}' + table: 102 + routing-policy: + - from: '{{ ostack_ipalias_network_cidr }}' + table: 102 + +- name: Copy out ipalias netplan config + ansible.builtin.copy: + content: '{{ netplan_config | to_nice_yaml }}' + dest: /etc/netplan/90-ansible-ipalias.yaml + mode: '0644' + register: netplan_config + become: true + +- name: Print out netplan config + ansible.builtin.debug: + msg: "Netplan {{ netplan_config }}" + # https://gitlab.met.no/ansible-roles/ipalias/-/blob/master/tasks/netplan.yml?ref_type=heads + # this times out and then the servers are uncreachable? + # - name: Reboot server to apply new netplan config, without hitting netplan bug + # ansible.builtin.reboot: # noqa no-handler + # reboot_timeout: 3600 + # when: netplan_config is changed + # become: true + +- name: Apply netplan + ansible.builtin.command: sudo netplan apply + async: 45 + poll: 0 + register: netplan_output + # TODO: need to check output of netplan apply to determine + # changed_when condition + changed_when: true diff --git a/ansible/roles/primarystandbysetup/tasks/create-primary.yml b/ansible/roles/primarystandbysetup/tasks/create-primary.yml deleted file mode 100644 index 94d364f6..00000000 --- a/ansible/roles/primarystandbysetup/tasks/create-primary.yml +++ /dev/null @@ -1,262 +0,0 @@ -# set up a role and provide suitable entries in pg_hba.conf with the database field set to replication - -# ensure max_wal_senders is set to a sufficiently large value in the conf file (also possibly max_replication_slots?) -# When running a standby server, you must set this parameter to the same or higher value than on the primary server. Otherwise, queries will not be allowed in the standby server. - -# set wal_keep_size to a value large enough to ensure that WAL segments are not recycled too early, or configure a replication slot for the standby? -# if there is a WAL archive accessible to the standby this may not be needed? - -# On systems that support the keepalive socket option, setting tcp_keepalives_idle, tcp_keepalives_interval and tcp_keepalives_count helps the primary promptly notice a broken connection. - -# example auth -# Allow the user "foo" from host 192.168.1.100 to connect to the primary -# as a replication standby if the user's password is correctly supplied. -# -# TYPE DATABASE USER ADDRESS METHOD -# host replication foo 192.168.1.100/32 md5 ---- - - name: Create a new database with name lard - community.postgresql.postgresql_db: - name: lard - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Copy the db folder to the remote - ansible.builtin.copy: - src: ../../../../db/ - dest: /etc/postgresql/16/db/ - mode: '0755' - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Create the public schema in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/public.sql - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Create the labels schema in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/labels.sql - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Connect to lard database, create user - community.postgresql.postgresql_user: - db: lard - name: lard_user - password: '{{ db_password }}' - role_attr_flags: SUPERUSER # not desired, but the privelege granting doesn't seem to work? - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - # - name: Grant lard_user priveleges on lard database - # community.postgresql.postgresql_privs: - # type: database - # db: lard - # privs: ALL - # role: lard_user - # become: true - # become_user: postgres - - # MAKE IT THE PRIMARY - - name: Set wal_level parameter - community.postgresql.postgresql_set: - name: wal_level - value: replica # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set hot_standby parameter - community.postgresql.postgresql_set: - name: hot_standby - value: true - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set hot_standby_feedback parameter - community.postgresql.postgresql_set: - name: hot_standby_feedback - value: true - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set max_wal_senders parameter - community.postgresql.postgresql_set: - name: max_wal_senders - value: 10 - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set wal_log_hints parameter # needs to be enabled to use pg_rewind - # https://www.postgresql.org/docs/current/app-pgrewind.html - community.postgresql.postgresql_set: - name: wal_log_hints - value: true - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set max_replication_slots parameter - community.postgresql.postgresql_set: - name: max_replication_slots - value: 10 - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - # make it SYNCHRONOUS REPLICATION (without the next two settings it would be asynchronous) - - name: Set synchronous_standby_names parameter - community.postgresql.postgresql_set: - name: synchronous_standby_names # https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES - value: "*" # all the standbys - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set synchronous_commit parameter - community.postgresql.postgresql_set: - name: synchronous_commit # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT - value: on # will not give standby query consistency (tradeoff for better write performance), but will give standby durable commit after OS crash - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # repmgr - # https://www.repmgr.org/docs/current/quickstart-repmgr-conf.html - - name: Create a repmgr.conf if it does not exist - ansible.builtin.file: - path: /etc/repmgr.conf - state: touch - mode: '0755' - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set contents of repmgr.conf - ansible.builtin.copy: - dest: "/etc/repmgr.conf" - content: | - node_id=1 - node_name='{{ primary_name }}' - conninfo='host={{ primary_ip }} user=repmgr dbname=repmgr connect_timeout=2' - data_directory='/mnt/ssd-b/16/main' - service_start_command='sudo /bin/systemctl start postgresql.service' - service_stop_command='sudo /bin/systemctl stop postgresql.service' - service_restart_command='sudo /bin/systemctl restart postgresql.service' - service_reload_command='sudo /bin/systemctl reload postgresql.service' - mode: '0755' - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # https://www.repmgr.org/docs/current/quickstart-primary-register.html - - name: Run repmgr to register the primary - ansible.builtin.command: repmgr -f /etc/repmgr.conf primary register -F # only need -F if rerunning - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - register: register_primary_results - - name: Print out the register_primary_results - ansible.builtin.debug: - msg: "repmgr {{ register_primary_results }}" - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # # STUFF FOR REPLICATION (do not need if using repmgr) - # - name: Create replicator user with replication priveleges - # community.postgresql.postgresql_user: - # name: replicator - # password: '{{ replicator_password }}' - # role_attr_flags: REPLICATION - # become: true - # become_user: postgres - - # # also specifically allow the replicator user - # - name: Change hba conf to allow replicator to connect - # community.postgresql.postgresql_pg_hba: - # dest: /etc/postgresql/16/main/pg_hba.conf - # databases: replication - # contype: host - # users: replicator - # #address: all - # address: '{{ standby_host }}' - # method: trust # seems to hang with md5, how to make auth work? - # become: true - - # # create replication slot - # - name: Create physical replication slot if doesn't exist - # become_user: postgres - # community.postgresql.postgresql_slot: - # slot_name: replication_slot - # #db: lard - # become: true - - # make sure these changes take effect? - - name: Restart service postgres - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - ### now move back to default of operating from localhost - - name: Attach primary floating ip - block: - - name: Gather information about primary server - openstack.cloud.server_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ primary_name }}' - become: false - register: primary_server - - - name: Print out the ipalias port information for the server - ansible.builtin.debug: - msg: "Server {{ primary_server.servers[0].addresses.ipalias }}" - - # give the primary a particular floating ip - - name: Attach floating ip address that we keep connected to the primary - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ primary_server.servers[0].id }}' - reuse: true - network: public - fixed_address: '{{ primary_server.servers[0].addresses.ipalias[0].addr }}' - floating_ip_address: '{{ primary_floating_ip }}' - wait: true - timeout: 60 - when: primary_server.servers[0].addresses.ipalias | length <=1 - # unfortunately it seems that attaching the floating ip results in a timeout - # even though it actually succeeds - ignore_errors: true - - - name: Check floating ip is attached - openstack.cloud.floating_ip_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - floating_ip_address: '{{ primary_floating_ip }}' - register: fip - - # this will not run if the ip is not now on the vm - - name: Print out the floating ip information to confirm its ok - ansible.builtin.debug: - msg: "Floating ip {{ fip }}" - when: fip.floating_ips[0].port_details.device_id == primary_server.servers[0].id \ No newline at end of file diff --git a/ansible/roles/primarystandbysetup/tasks/create-standby.yml b/ansible/roles/primarystandbysetup/tasks/create-standby.yml deleted file mode 100644 index d565243e..00000000 --- a/ansible/roles/primarystandbysetup/tasks/create-standby.yml +++ /dev/null @@ -1,149 +0,0 @@ -# create standby.signal file in data directory - -# configure streaming WAL -# primary_conninfo needs a libpq connection string (ip address + other details needed to connect to primary server) - -# since we want the standby to be able to operate as the primary, we need to configure the WAL archiving, connections, and auth like the primary -# example: -#primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass options=''-c wal_sender_timeout=5000''' -#restore_command = 'cp /path/to/archive/%f %p' -#archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r' - -# add the following line to the postgresql.conf file on the standby -# The standby connects to the primary that is running on host 192.168.1.50 -# and port 5432 as the user "foo" whose password is "foopass". -#primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass' - -# use the replication slot on the primary (in file after the primary_conninfo) -#primary_slot_name = 'node_a_slot' ---- - # repmgr - # https://www.repmgr.org/docs/current/quickstart-standby-clone.html - # must be done before the standby is put into read only mode (therefore not idempotent) - - name: Create a repmgr.conf if it does not exist - ansible.builtin.file: - path: /etc/repmgr.conf - state: touch - mode: '0755' - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - name: Set contents of repmgr.conf - ansible.builtin.copy: - dest: "/etc/repmgr.conf" - content: | - node_id=2 - node_name='{{ standby_name }}' - conninfo='host={{ standby_ip }} user=repmgr dbname=repmgr connect_timeout=2' - data_directory='/mnt/ssd-b/16/main' - service_start_command='sudo /bin/systemctl start postgresql.service' - service_stop_command='sudo /bin/systemctl stop postgresql.service' - service_restart_command='sudo /bin/systemctl restart postgresql.service' - service_reload_command='sudo /bin/systemctl reload postgresql.service' - mode: '0755' - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - - name: Stop service postgres, if running - ansible.builtin.systemd_service: - name: postgresql - state: stopped - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - # https://www.repmgr.org/docs/current/quickstart-standby-clone.html - - name: Run repmgr to dry run clone - ansible.builtin.command: repmgr -h '{{ primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: dry_run_clone_results - - name: Print out the dry_run_clone_results - ansible.builtin.debug: - msg: "repmgr {{ dry_run_clone_results }}" - - - name: Run repmgr to clone standby from primary - ansible.builtin.command: repmgr -h '{{ primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone -F - become: true - register: clone_results - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - name: Print out the clone_results - ansible.builtin.debug: - msg: "repmgr {{ clone_results }}" - - # try to clean up so can run standby clone ? - # - name: Recursively remove directory - # ansible.builtin.file: - # path: /mnt/ssd-b/16/main - # state: absent - # become: true - # - name: Create a main directory if it does not exist - # ansible.builtin.file: - # path: /mnt/ssd-b/16/main - # state: directory - # mode: '0700' - # become: true - # become_user: postgres - - # https://www.postgresql.org/docs/current/app-pgbasebackup.html - # NOTE: this part is not idempotent, so if a db is already in the dir, it will fail - # hence the stuff above that means this should not be run on a database with data!!! - # not needed if using repmgr, since clone calls this - # - name: Run pg_basebackup to initialize the replica / standby - # ansible.builtin.shell: export PGPASSWORD="{{ replicator_password }}" && pg_basebackup --pgdata=/mnt/ssd-b/16/main -R --slot=replication_slot --user=replicator --host={{ primary_host }} --port=5432 - # args: - # executable: /bin/bash - # become: true - # become_user: postgres - # register: basebackup_results - - # - name: Print out the basebackup_results - # debug: msg="backup {{ basebackup_results }}" - - - name: Restart service postgres - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - - name: Waits for port 5432 to be available, don't check for initial 10 seconds - ansible.builtin.wait_for: - host: 0.0.0.0 - port: 5432 - delay: 10 - state: started - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - # https://www.repmgr.org/docs/current/quickstart-register-standby.html - - name: Run repmgr to register the standby - ansible.builtin.command: repmgr -f /etc/repmgr.conf standby register - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: register_standby_results - - name: Print out the register_standby_results - ansible.builtin.debug: - msg: "repmgr {{ register_standby_results }}" - - # run some sql... to confirm clone? - - name: Do some sql to test for the existence of lard...? - community.postgresql.postgresql_query: - db: lard - query: select count(*) from timeseries - register: query_results - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - name: Print out the query - ansible.builtin.debug: - msg: "Query {{ query_results }}" diff --git a/ansible/roles/primarystandbysetup/tasks/main.yml b/ansible/roles/primarystandbysetup/tasks/main.yml deleted file mode 100644 index d1d6f048..00000000 --- a/ansible/roles/primarystandbysetup/tasks/main.yml +++ /dev/null @@ -1,7 +0,0 @@ -# roles/primarystandbysetup/tasks/main.yml -- name: Turn a vm into the primary - import_tasks: create-primary.yml - -# note, may in the future want to make multiple standbys -- name: Turn a vm into the standby - import_tasks: create-standby.yml diff --git a/ansible/roles/rejoin/default/main.yml b/ansible/roles/rejoin/default/main.yml new file mode 100644 index 00000000..7c7866b6 --- /dev/null +++ b/ansible/roles/rejoin/default/main.yml @@ -0,0 +1,2 @@ +--- +rejoin_primary_ip: diff --git a/ansible/roles/rejoin/tasks/main.yml b/ansible/roles/rejoin/tasks/main.yml index 82fad6c4..65ed311e 100644 --- a/ansible/roles/rejoin/tasks/main.yml +++ b/ansible/roles/rejoin/tasks/main.yml @@ -1,3 +1,53 @@ -# roles/rejoin/tasks/main.yml -- name: Rejoin an old primary to cluster as standby - import_tasks: rejoin_old_primary.yml +--- +- name: Stop postgres service + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +- name: Dry run of rejoin + ansible.builtin.command: > + repmgr node rejoin + -f /etc/repmgr.conf -d 'host='{{ rejoin_primary_ip }}' user=repmgr dbname=repmgr connect_timeout=2' + --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose --dry-run + become: true + become_user: postgres + register: dry_run_results + changed_when: false # dry run does not change status + +- name: Print out the rejoin_dry_run_results + ansible.builtin.debug: + msg: "repmgr {{ dry_run_results }}" + +# TODO: add changed_when to fix lint? Need to figure out what the output of the command looks like +# Or is it always changed_when: true? +- name: Rejoin old primary as standby + ansible.builtin.command: > + repmgr node rejoin + -f /etc/repmgr.conf -d 'host='{{ rejoin_primary_ip }}' user=repmgr dbname=repmgr connect_timeout=2' + --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose + become: true + become_user: postgres + register: node_rejoin_results + changed_when: true + +- name: Print out the rejoin_results + ansible.builtin.debug: + msg: "repmgr {{ node_rejoin_results }}" + +- name: Start service postgres + ansible.builtin.systemd_service: + name: postgresql + state: started + become: true + +- name: Check cluster + ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show + become: true + become_user: postgres + register: status_results + changed_when: false # cluster show does not modify status of the host + +- name: Print out the status_results + ansible.builtin.debug: + msg: "repmgr {{ status_results }}" diff --git a/ansible/roles/rejoin/tasks/rejoin_old_primary.yml b/ansible/roles/rejoin/tasks/rejoin_old_primary.yml deleted file mode 100644 index e28d92ba..00000000 --- a/ansible/roles/rejoin/tasks/rejoin_old_primary.yml +++ /dev/null @@ -1,39 +0,0 @@ ---- - - name: stop service postgres - ansible.builtin.systemd_service: - name: postgresql - state: stopped - become: true - - - name: Dry run of rejoin - ansible.builtin.command: repmgr node rejoin -f /etc/repmgr.conf -d 'host='{{ primary_ip }}' user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose --dry-run - become: true - become_user: postgres - register: rejoin_dry_run_results - - name: Print out the rejoin_dry_run_results - ansible.builtin.debug: - msg: "repmgr {{ rejoin_dry_run_results }}" - - - name: Rejoin old primary as standby - ansible.builtin.command: repmgr node rejoin -f /etc/repmgr.conf -d 'host='{{ primary_ip }}' user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose - become: true - become_user: postgres - register: rejoin_results - - name: Print out the rejoin_results - ansible.builtin.debug: - msg: "repmgr {{ rejoin_results }}" - - - name: start service postgres - ansible.builtin.systemd_service: - name: postgresql - state: started - become: true - - - name: Check cluster - ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show - become: true - become_user: postgres - register: status_results - - name: Print out the status_results - ansible.builtin.debug: - msg: "repmgr {{ status_results }}" \ No newline at end of file diff --git a/ansible/roles/ssh/default/main.yml b/ansible/roles/ssh/default/main.yml new file mode 100644 index 00000000..78126382 --- /dev/null +++ b/ansible/roles/ssh/default/main.yml @@ -0,0 +1,3 @@ +ssh_user_key_list: + - name: + key: diff --git a/ansible/roles/ssh/tasks/localhost.yml b/ansible/roles/ssh/tasks/localhost.yml new file mode 100644 index 00000000..ff377ea1 --- /dev/null +++ b/ansible/roles/ssh/tasks/localhost.yml @@ -0,0 +1,12 @@ +--- +- name: Add hostkey to localhost `known_hosts` + ansible.builtin.known_hosts: + name: "{{ ansible_host }}" + state: present + # NOTE: requires `gather_facts: true` + key: > + "{{ ansible_host }}" + ecdsa-sha2-nistp256 + "{{ ansible_ssh_host_key_ecdsa_public }}" + hash_host: true + delegate_to: localhost diff --git a/ansible/roles/ssh/tasks/main.yml b/ansible/roles/ssh/tasks/main.yml deleted file mode 100644 index 1f968d65..00000000 --- a/ansible/roles/ssh/tasks/main.yml +++ /dev/null @@ -1,3 +0,0 @@ -# roles/ssh/tasks/main.yml -- name: Share the ssh keys one way between 2 particular VMs - import_tasks: share-ssh-keys.yml diff --git a/ansible/roles/ssh/tasks/postgres.yml b/ansible/roles/ssh/tasks/postgres.yml new file mode 100644 index 00000000..8fd33b19 --- /dev/null +++ b/ansible/roles/ssh/tasks/postgres.yml @@ -0,0 +1,65 @@ +--- +# find the other vms, that are not currently being iterated over +- name: Set host fact other_vms + ansible.builtin.set_fact: + other_vms: "{{ (ansible_play_hosts_all | difference([inventory_hostname])) }}" + +- name: List other VMs + ansible.builtin.debug: + msg: "{{ item }}" + loop: "{{ other_vms }}" + +- name: Create user postgres + ansible.builtin.user: + name: postgres + generate_ssh_key: true + ssh_key_bits: 2048 + ssh_key_file: .ssh/id_rsa + force: true + register: ssh_keys + become: true + +# Another way to generate a ssh key... +# - name: Force regenerate an OpenSSH keypair if it already exists +# community.crypto.openssh_keypair: +# path: .ssh/id_rsa +# force: true +# owner: postgres # should be this user's key +# register: ssh_keys +# become: true + +- name: List generated SSH key + ansible.builtin.debug: + msg: "{{ ssh_keys.ssh_public_key }}" + +- name: Add the key to authorized_key on the other vm + ansible.posix.authorized_key: + user: postgres + state: present + key: "{{ ssh_keys.ssh_public_key }}" + become: true + delegate_to: "{{ item }}" + loop: "{{ other_vms }}" + +- name: Get the host key + ansible.builtin.set_fact: + # NOTE: requires `gather_facts: true` + hostkey: "{{ ansible_ssh_host_key_ecdsa_public }}" + +- name: List host key + ansible.builtin.debug: + msg: "{{ hostkey }}" + +- name: List vm ip + ansible.builtin.debug: + msg: "{{ ansible_host }}" + +- name: Add the vm to known_hosts on the other vm + ansible.builtin.known_hosts: + path: ~postgres/.ssh/known_hosts # need this for the postgres user + name: "{{ ansible_host }}" + key: "{{ ansible_host }} ecdsa-sha2-nistp256 {{ hostkey }}" + state: present + become: true + delegate_to: "{{ item }}" + loop: "{{ other_vms }}" diff --git a/ansible/roles/ssh/tasks/share-ssh-keys.yml b/ansible/roles/ssh/tasks/share-ssh-keys.yml deleted file mode 100644 index 389f4b15..00000000 --- a/ansible/roles/ssh/tasks/share-ssh-keys.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- - # find the other vm, that is not currently being iterated over (this will need to be changed if more than 2) - - name: Setting host facts for other_vm - ansible.builtin.set_fact: - other_vm: '{{ (ansible_play_hosts_all | difference([inventory_hostname])) | first }}' - - - name: List other vm - ansible.builtin.debug: - msg: "{{ other_vm }}" - - - name: Create user postgres - ansible.builtin.user: - name: postgres - generate_ssh_key: true - ssh_key_bits: 2048 - ssh_key_file: .ssh/id_rsa - force: true - register: ssh_keys - become: true - # Another way to generate a ssh key... - # - name: Force regenerate an OpenSSH keypair if it already exists - # community.crypto.openssh_keypair: - # path: .ssh/id_rsa - # force: true - # owner: postgres # should be this user's key - # register: ssh_keys - # become: true - - - name: List generated SSH key - ansible.builtin.debug: - msg: "{{ ssh_keys.ssh_public_key }}" - - - name: Add the key to authorized_key on the other vm - ansible.posix.authorized_key: - user: postgres - state: present - key: '{{ ssh_keys.ssh_public_key }}' - become: true - delegate_to: '{{ other_vm }}' - - - name: Get the host key - ansible.builtin.set_fact: - hostkey: '{{ ansible_ssh_host_key_ecdsa_public }}' - - - name: List host key - ansible.builtin.debug: - msg: "{{ hostkey }}" - - - name: List vm ip - ansible.builtin.debug: - msg: "{{ vm_ip }}" - - - name: Add the vm to known_hosts on the other vm - ansible.builtin.known_hosts: - path: ~postgres/.ssh/known_hosts # need this for the postgres user - name: '{{ vm_ip }}' - key: '{{ vm_ip }} ecdsa-sha2-nistp256 {{ hostkey }}' - state: present - become: true - delegate_to: '{{ other_vm }}' \ No newline at end of file diff --git a/ansible/roles/ssh/tasks/users.yml b/ansible/roles/ssh/tasks/users.yml new file mode 100644 index 00000000..a1c46cef --- /dev/null +++ b/ansible/roles/ssh/tasks/users.yml @@ -0,0 +1,10 @@ +--- +- name: Add users keys to authorized_keys + ansible.posix.authorized_key: + # username on the remotehost whose authorized keys are being modified + user: ubuntu + state: present + key: "{{ item.key }}" + loop: "{{ ssh_user_key_list }}" + loop_control: + label: "adding {{ item.name }} key to authorized_keys" diff --git a/ansible/roles/switchover/default/main.yml b/ansible/roles/switchover/default/main.yml new file mode 100644 index 00000000..696c0315 --- /dev/null +++ b/ansible/roles/switchover/default/main.yml @@ -0,0 +1,2 @@ +--- +switchover_primary: diff --git a/ansible/roles/switchover/tasks/main.yml b/ansible/roles/switchover/tasks/main.yml index 0fab67d6..3bc533ed 100644 --- a/ansible/roles/switchover/tasks/main.yml +++ b/ansible/roles/switchover/tasks/main.yml @@ -1,4 +1,61 @@ -# roles/switchover/tasks/main.yml -- name: Switchover - import_tasks: switchover.yml +--- +# TODO: maybe restart in separate task? So we only have standby code here? +# assume the db is already there and synched, +# so now want to turn the primary into a standby / replica +# and want to turn the current standby into the primary +- name: Restart service postgres (primary) + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + delegate_to: "{{ switchover_primary }}" + +# try to avoid issue: https://github.com/EnterpriseDB/repmgr/issues/703 +- name: Restart service postgres (standby) + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +# can now just do this with repmgr +# https://www.repmgr.org/docs/current/preparing-for-switchover.html +# need the two instances to be able to ssh to each other! +# siblings-follow only really needed if have multiple standbys... +- name: Dry run of switching the standby and primary + ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run + become: true + become_user: postgres + register: switchover_dry_run_results + changed_when: false + +- name: Print out the switchover_dry_run_results + ansible.builtin.debug: + msg: "repmgr {{ switchover_dry_run_results }}" + +# see preparing for switchover if things go wrong despite dry run, there is +# mention of --force-rewind which would use pg_rewind to try to fix divergent timelines... + +## https://www.repmgr.org/docs/current/switchover-execution.html +## https://www.repmgr.org/docs/current/switchover-troubleshooting.html +- name: Switch the standby and primary + ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow + become: true + become_user: postgres + register: switchover_results + changed_when: true + +- name: Print out the switchover_results + ansible.builtin.debug: + msg: "repmgr {{ switchover_results }}" + +- name: Check cluster + ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show + become: true + become_user: postgres + register: status_results + changed_when: false + +- name: Print out the status_results + ansible.builtin.debug: + msg: "repmgr {{ status_results }}" diff --git a/ansible/roles/switchover/tasks/switchover.yml b/ansible/roles/switchover/tasks/switchover.yml deleted file mode 100644 index 1573d7a1..00000000 --- a/ansible/roles/switchover/tasks/switchover.yml +++ /dev/null @@ -1,60 +0,0 @@ -# assume the db is already there and synched, so now want to turn into a standby / replica -# and want to turn the current standby into the primary ---- - - name: Restart service postgres (primary) - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # try to avoid issue: https://github.com/EnterpriseDB/repmgr/issues/703 - - name: Restart service postgres (standby) - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - # can now just do this with repmgr - # https://www.repmgr.org/docs/current/preparing-for-switchover.html - # need the two instances to be able to ssh to each other! - # siblings-follow only really needed if have multiple standbys... - - name: Dry run of switching the standby and primary - ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: switchover_dry_run_results - - name: Print out the switchover_dry_run_results - ansible.builtin.debug: - msg: "repmgr {{ switchover_dry_run_results }}" - ## see preparing for switchover if things go wrong despite dry run, there is mention of --force-rewind - ## which would use pg_rewind to try to fix divergent timelines... - - ## https://www.repmgr.org/docs/current/switchover-execution.html - ## https://www.repmgr.org/docs/current/switchover-troubleshooting.html - - name: Switch the standby and primary - ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: switchover_results - - name: Print out the switchover_results - ansible.builtin.debug: - msg: "repmgr {{ switchover_results }}" - - - name: Check cluster - ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: status_results - - name: Print out the status_results - ansible.builtin.debug: - msg: "repmgr {{ status_results }}" diff --git a/ansible/roles/vm/tasks/create-project-vm.yml b/ansible/roles/vm/tasks/create-project-vm.yml deleted file mode 100644 index 408d14cf..00000000 --- a/ansible/roles/vm/tasks/create-project-vm.yml +++ /dev/null @@ -1,89 +0,0 @@ ---- - - name: Create VM - openstack.cloud.server: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - availability_zone: '{{ availability_zone }}' - name: '{{ name_stuff }}' - image: '{{ ostack_image }}' - flavor: '{{ ostack_flavor }}' - network: '{{ ostack_network_name }}' - key_name: '{{ ostack_key_name }}' - security_groups: '{{ security_groups_list }}' - state: '{{ ostack_state }}' - # do not give ip, since want to assign a specific one in next step (so as to reuse the ones we have) - auto_ip: false - register: server - - - name: Print out the server information - ansible.builtin.debug: - msg: "Server {{ lookup('ansible.builtin.dict', server) }}" - - - name: Attach floating ip address - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ server.server.id }}' - reuse: true - network: public - floating_ip_address: '{{ vm_ip }}' - wait: true - timeout: 60 - - - name: Create Volume - openstack.cloud.volume: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - availability_zone: '{{ availability_zone }}' - name: '{{ name_stuff }}' - volume_type: '{{ volume_type }}' - size: '{{ volume_size }}' - register: volume - - - name: Print out the volume information - ansible.builtin.debug: - msg: "Volume {{ lookup('ansible.builtin.dict', volume) }}" - - - name: Attach a volume to a compute host - openstack.cloud.server_volume: - state: present - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - volume: '{{ volume.volume.id }}' - server: '{{ server.server.id }}' - device: /dev/vdb - - - name: Create port for ipalias and set security groups - openstack.cloud.port: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: 'ipalias-{{ name_stuff }}' - network: '{{ ipalias_network_name }}' - security_groups: '{{ security_groups_list }}' - - - name: Get port info - openstack.cloud.port_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - port: 'ipalias-{{ name_stuff }}' - register: ipalias_port - - - name: Print out the port information - ansible.builtin.debug: - msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" - - - name: Add the ipalias network to server - ansible.builtin.command: # noqa no-changed-when - argv: - - openstack - - --os-cloud - - '{{ ostack_cloud }}' - - --os-region-name - - '{{ ostack_region }}' - - server - - add - - port - - '{{ server.server.id }}' - - 'ipalias-{{ name_stuff }}' - when: ipalias_port.ports.0.device_id | length <=0 diff --git a/ansible/roles/vm/tasks/main.yml b/ansible/roles/vm/tasks/main.yml deleted file mode 100644 index 589fc0ef..00000000 --- a/ansible/roles/vm/tasks/main.yml +++ /dev/null @@ -1,4 +0,0 @@ -# roles/vms/tasks/main.yml -- name: Create a VM - import_tasks: create-project-vm.yml - diff --git a/ansible/roles/vm/vars/main.yml b/ansible/roles/vm/vars/main.yml deleted file mode 100644 index d079f5bd..00000000 --- a/ansible/roles/vm/vars/main.yml +++ /dev/null @@ -1,12 +0,0 @@ - # VM config - ostack_flavor: m1.xxlarge - ostack_image: met-jammy-latest - ostack_state: present - ostack_network_name: lard - security_groups_list: - - default - - ssh_usernet - - postgres - - ping - volume_type: __DEFAULT__ - volume_size: 900 \ No newline at end of file diff --git a/ansible/roles/vm_format/tasks/format-mount-disk.yml b/ansible/roles/vm_format/tasks/format-mount-disk.yml deleted file mode 100644 index 5917fa77..00000000 --- a/ansible/roles/vm_format/tasks/format-mount-disk.yml +++ /dev/null @@ -1,45 +0,0 @@ ---- - - name: Create /mnt/ssd-data - ansible.builtin.file: - path: /mnt/ssd-data - state: directory - owner: ubuntu # change to postgres? - group: ubuntu # change to postgres? - mode: 'u=rw,g=rws,o=r' - become: true - - - name: Create ext4 filesystem on {{ mount_point }} - community.general.filesystem: - dev: '{{ mount_point }}' - fstype: ext4 - become: true - - - name: Read device information (always use unit when probing) - community.general.parted: - device: '{{ mount_point }}' - unit: MiB - register: sdb_info - become: true - - - name: Print out the device information - ansible.builtin.debug: - msg: "Partitions {{ sdb_info.partitions }}" - - # this also changes the fstab so its still there when rebooted! - - name: Mount the disk from {{ mount_point }} - ansible.posix.mount: - path: /mnt/ssd-data - src: '{{ mount_point }}' - fstype: ext4 - state: mounted - become: true - - - name: Fetch the UUID of {{ mount_point }} - ansible.builtin.command: blkid --match-tag UUID --output value '{{ mount_point }}' - changed_when: false - register: blkid_cmd - become: true - - - name: Print out the UUID - ansible.builtin.debug: - msg: "UUID {{ blkid_cmd.stdout }}" diff --git a/ansible/roles/vm_format/tasks/install-postgres.yml b/ansible/roles/vm_format/tasks/install-postgres.yml deleted file mode 100644 index 58642405..00000000 --- a/ansible/roles/vm_format/tasks/install-postgres.yml +++ /dev/null @@ -1,136 +0,0 @@ ---- - - name: Add postgres apt key by id from a keyserver - ansible.builtin.apt_key: - url: https://www.postgresql.org/media/keys/ACCC4CF8.asc - state: present - become: true - - - name: Add postgres repository into sources list - ansible.builtin.apt_repository: - repo: deb https://apt.postgresql.org/pub/repos/apt jammy-pgdg main - state: present - become: true - - - name: Install a list of packages - ansible.builtin.apt: - pkg: - - nano - - postgresql-16 - - postgresql-16-repmgr # https://www.repmgr.org/docs/current/install-requirements.html - - pip # needed for installing psycopg2 - - acl # needed for becoming unpriveleged user (such as postgres) - update_cache: true - become: true - - - name: Install psycopg2 python package # dependency for postgres ansible stuff? - ansible.builtin.pip: - name: psycopg2-binary - become: true - - - name: Install openstacksdk python package - ansible.builtin.pip: - name: openstacksdk - become: true - - # make is so the data is actually kept on the ssd mount... - - ### synch the postgres stuff over to new directory, but stop postgres first - - name: Stop service postgres, if running - ansible.builtin.systemd_service: - name: postgresql - state: stopped - become: true - - - name: Run rsync - ansible.builtin.command: rsync -av /var/lib/postgresql/ /mnt/ssd-b/ - become: true - - ## change where data is stored and open up network wise - - name: Comment out original data_directory - ansible.builtin.replace: - dest: /etc/postgresql/16/main/postgresql.conf - regexp: '^data_directory' - replace: '#data_directory' - become: true - - - name: Modify postgresql config - ansible.builtin.blockinfile: - dest: /etc/postgresql/16/main/postgresql.conf - block: | - data_directory = '/mnt/ssd-b/16/main' - listen_addresses = '*' - become: true - - # probably want to restrict this once we know what will connect? - # but the security group rules should take care of limiting to met ranges - - name: Change hba conf to allow connections - community.postgresql.postgresql_pg_hba: - dest: /etc/postgresql/16/main/pg_hba.conf - contype: host - address: all # can we put met internal ip range(s)? - method: md5 - # users and database default to all - become: true - - # make sure these changes take effect - - name: Start up postgres service again - ansible.builtin.systemd_service: - name: postgresql - state: started - become: true - - # REPMGR - - name: Create repmgr user # https://www.repmgr.org/docs/current/quickstart-repmgr-user-database.html - community.postgresql.postgresql_user: - name: repmgr - password: '{{ repmgr_password }}' - role_attr_flags: SUPERUSER - become: true - become_user: postgres - - - name: Create a repmgr database, with owner repmgr - community.postgresql.postgresql_db: - name: repmgr - owner: repmgr - become: true - become_user: postgres - - - name: Change hba conf to allow repmgr to connect for replication - community.postgresql.postgresql_pg_hba: - dest: /etc/postgresql/16/main/pg_hba.conf - databases: replication - contype: host - users: repmgr - address: all - # address: '{{ standby_host }}' - method: trust - become: true - - - name: Change hba conf to allow repmgr to connect to the repmgr db - community.postgresql.postgresql_pg_hba: - dest: /etc/postgresql/16/main/pg_hba.conf - databases: repmgr - contype: host - users: repmgr - address: all - # address: '{{ standby_host }}' - method: trust - become: true - - - name: Restart postgres - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - - - name: Allow the postgres user to run /bin/systemctl restart, stop, start postgres - community.general.sudoers: - name: postgresql - user: postgres - commands: - - /bin/systemctl restart postgresql.service - - /bin/systemctl stop postgresql.service - - /bin/systemctl start postgresql.service - - /bin/systemctl reload postgresql.service - nopassword: true - become: true diff --git a/ansible/roles/vm_format/tasks/main.yml b/ansible/roles/vm_format/tasks/main.yml deleted file mode 100644 index 36d09ebb..00000000 --- a/ansible/roles/vm_format/tasks/main.yml +++ /dev/null @@ -1,9 +0,0 @@ -# roles/vm_format/tasks/main.yml -- name: netplan - import_tasks: netplan.yml - -- name: Format and mount the disk - import_tasks: format-mount-disk.yml - -- name: Install postgres - import_tasks: install-postgres.yml \ No newline at end of file diff --git a/ansible/roles/vm_format/tasks/netplan.yml b/ansible/roles/vm_format/tasks/netplan.yml deleted file mode 100644 index 118cb065..00000000 --- a/ansible/roles/vm_format/tasks/netplan.yml +++ /dev/null @@ -1,61 +0,0 @@ ---- - - name: Get port info - openstack.cloud.port_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - port: 'ipalias-{{ name_stuff }}' - register: ipalias_port - delegate_to: localhost - - - name: Print out the port information - ansible.builtin.debug: - msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" - delegate_to: localhost - - - name: IP alias netplan configuration - ansible.builtin.set_fact: - netplan_config: - network: - version: 2 - ethernets: - ens6: - dhcp4: true - dhcp4-overrides: - use-routes: false - match: - macaddress: '{{ ipalias_port.ports.0.mac_address }}' - set-name: ens6 - routes: - - to: 0.0.0.0/0 - via: '{{ ipalias_ostack_network_cidr | ansible.utils.ipaddr("net") | ansible.utils.ipaddr("1") | ansible.utils.ipaddr("address") }}' - table: 102 - routing-policy: - - from: '{{ ipalias_ostack_network_cidr }}' - table: 102 - become: true - - - name: Copy out ipalias netplan config - ansible.builtin.copy: - content: '{{ netplan_config | to_nice_yaml }}' - dest: /etc/netplan/90-ansible-ipalias.yaml - mode: '0644' - register: netplan_config - become: true - - - name: Print out netplan config - ansible.builtin.debug: - msg: "Netplan {{ netplan_config }}" - - - name: Apply netplan - ansible.builtin.command: sudo netplan apply - async: 45 - poll: 0 - -# https://gitlab.met.no/ansible-roles/ipalias/-/blob/master/tasks/netplan.yml?ref_type=heads -# this times out and then the servers are uncreachable? -# - name: Reboot server to apply new netplan config, without hitting netplan bug -# ansible.builtin.reboot: # noqa no-handler - # reboot_timeout: 3600 -# when: netplan_config is changed -# become: true - \ No newline at end of file diff --git a/ansible/roles/vm_format/vars/main.yml b/ansible/roles/vm_format/vars/main.yml deleted file mode 100644 index b998a5ae..00000000 --- a/ansible/roles/vm_format/vars/main.yml +++ /dev/null @@ -1,18 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -62373161613862343930306664363533666462303234343834336261636564626236633939393335 -3432396263316238336231346531643965306361386265620a636633646437306565303839333733 -37373533366266313034653465323365396230616136653362313435366264383532373735613436 -6639336335363766370a386536616365653437643865623238353338666666323132646565393439 -39626633353230373562343932363236393834623530663836363732653366633838613738646238 -32363330356337626638373335613466383132396164323334313335666234646130316662626438 -32346665386365363064633335316265643332663331656661613262353330633036656334393436 -66373332376239626666653666333637663337303637353162336530633637303037666631343961 -66383438643832653831303563623232643034616663303336613263653037376363653765386638 -37616332383163376536393732323439613963353339613737653433383562383038626337306563 -66366232346433636330353435306664613537663630646434303635346365346462336662336334 -65383233343634373235383236303434623138633966663864633463363432376635356339363961 -37363263633534633866343937386635366661613939373832653466303635653063343839306466 -33393966373739616133346432343332383330353332653938396433303364376331326334643236 -35646566313563303765303764366663326639323264383831383862653731313031633739313036 -33316664313061393934663763663435646138303930386335393832373230386338363538326263 -65333663396132396535346338393765366331663238396538633163383263616161 diff --git a/ansible/switchover.yml b/ansible/switchover.yml index 48c7ec6b..336d925f 100644 --- a/ansible/switchover.yml +++ b/ansible/switchover.yml @@ -1,19 +1,30 @@ +--- - name: Switch the primary and standby / replica - hosts: localhost - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT + hosts: "{{ standby }}" gather_facts: false - pre_tasks: - - name: find primary ip from inventory - ansible.builtin.set_fact: - primary_ip: '{{ item }}' - with_inventory_hostnames: '{{ name_primary }}' - - name: find standby ip from inventory - ansible.builtin.set_fact: - standby_ip: '{{ item }}' - with_inventory_hostnames: '{{ name_standby }}' + remote_user: ubuntu + vars: + primary: # provide via cmd + standby: # provide via cmd + roles: - # ensure the names are passed in the right way around for the current state! - - role: switchover - - role: movefloatingip \ No newline at end of file + - role: switchover + switchover_primary: "{{ primary }}" + +- name: Move floating IP to new primary + hosts: localhost + gather_facts: false + vars: + primary: # provide via cmd + standby: # provide via cmd + fip: # provide via cmd + + tasks: + - name: Perform IP switchover + ansible.builtin.include_role: + name: ostack + tasks_from: move_floating_ip.yml + vars: + ostack_primary: "{{ primary }}" + ostack_standby: "{{ standby }}" + ostack_floating_ip: "{{ fip }}" diff --git a/deploy.sh b/deploy.sh deleted file mode 100644 index 5d0a8c2c..00000000 --- a/deploy.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -if ! cargo build --workspace --release; then - exit 1 -fi - -pushd ansible || exit - -ansible-playbook -i inventory.yml deploy.yml --ask-vault-pass - -popd || exit diff --git a/fake_data_generator/Cargo.toml b/fake_data_generator/Cargo.toml index 45787828..ceb87680 100644 --- a/fake_data_generator/Cargo.toml +++ b/fake_data_generator/Cargo.toml @@ -12,3 +12,6 @@ rand_distr.workspace = true serde.workspace = true tokio.workspace = true tokio-postgres.workspace = true + +[[bin]] +name = "generate_partitions" diff --git a/fake_data_generator/src/bin/generate_partitions.rs b/fake_data_generator/src/bin/generate_partitions.rs new file mode 100644 index 00000000..8d76f55a --- /dev/null +++ b/fake_data_generator/src/bin/generate_partitions.rs @@ -0,0 +1,47 @@ +use std::{fs::File, io::Write}; + +use chrono::{DateTime, TimeZone, Utc}; +use std::io::BufWriter; + +fn create_table_partitions( + table: &str, + boundaries: &[DateTime], + writer: &mut BufWriter, +) -> Result<(), std::io::Error> { + // .windows(2) gives a 2-wide sliding view of the vector, so we can see + // both bounds relevant to a partition + for window in boundaries.windows(2) { + let start_time = window[0]; + let end_time = window[1]; + + let line = format!( + "CREATE TABLE IF NOT EXISTS {}_y{}_to_y{} PARTITION OF public.{}\nFOR VALUES FROM ('{}') TO ('{}');\n", + table, + start_time.format("%Y"), + end_time.format("%Y"), + table, + start_time.format("%Y-%m-%d %H:%M:%S+00"), + end_time.format("%Y-%m-%d %H:%M:%S+00") + ); + writer.write_all(line.as_bytes())?; + } + + Ok(()) +} + +fn main() -> Result<(), std::io::Error> { + let outfile = File::create("../db/partitions_generated.sql")?; + let mut writer = BufWriter::new(outfile); + + // create a vector of the boundaries between partitions + let paritition_boundary_years: Vec> = [1950, 2000, 2010] + .into_iter() + .chain(2015..=2030) + .map(|y| Utc.with_ymd_and_hms(y, 1, 1, 0, 0, 0).unwrap()) + .collect(); + + create_table_partitions("data", &paritition_boundary_years, &mut writer)?; + create_table_partitions("nonscalar_data", &paritition_boundary_years, &mut writer)?; + + Ok(()) +}