Skip to content

Commit

Permalink
Ensure workers and miq_worker rows match
Browse files Browse the repository at this point in the history
  • Loading branch information
agrare committed Mar 18, 2024
1 parent 4b80ae2 commit bcd5b98
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 8 deletions.
12 changes: 8 additions & 4 deletions app/models/miq_server/worker_management/kubernetes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ def sync_from_system
# we only have to sync the list of pods and deployments once
ensure_kube_monitors_started if my_server_is_primary?

# Before syncing the workers check for any orphaned worker rows that don't have
# a current pod and delete them
cleanup_orphaned_worker_rows

# Update worker deployments with updated settings such as cpu/memory limits
sync_deployment_settings
end
Expand Down Expand Up @@ -54,6 +50,14 @@ def cleanup_orphaned_worker_rows
end
end

def cleanup_orphaned_workers
orphaned_pods = current_pods.keys - miq_workers.pluck(:system_uid)
return if orphaned_pods.empty?

# TODO destroy orphaned pods
orphaned_pods.each { |_pod| }
end

def cleanup_failed_workers
super

Expand Down
14 changes: 14 additions & 0 deletions app/models/miq_server/worker_management/monitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def monitor_workers
# Cache a list of the native objects backing the miq_workers (e.g.: pods, services, or processes)
sync_from_system

# Cleanup any worker rows that don't have running workers
cleanup_orphaned_worker_rows

# Cleanup any workers that don't have corresponding miq_workers rows
cleanup_orphaned_workers

sync_monitor

# Sync the workers after sync'ing the child worker settings
Expand Down Expand Up @@ -49,6 +55,14 @@ def sync_workers
sync_stopping_workers
end

def cleanup_orphaned_worker_rows
raise NotImplementedError, "cleanup_orphaned_worker_rows must be implemented in a subclass"
end

def cleanup_orphaned_workers
raise NotImplementedError, "cleanup_orphaned_workers must be implemented in a subclass"
end

def cleanup_failed_workers
check_pending_stop
clean_worker_records
Expand Down
28 changes: 26 additions & 2 deletions app/models/miq_server/worker_management/process.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
class MiqServer::WorkerManagement::Process < MiqServer::WorkerManagement
def sync_from_system
require "sys/proctable"
self.miq_processes = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid }
@miq_processes_by_pid = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid }.index_by(&:pid)
end

def sync_starting_workers
Expand All @@ -12,6 +12,22 @@ def sync_stopping_workers
MiqWorker.find_all_stopping.to_a
end

def cleanup_orphaned_worker_rows
orphaned_rows = miq_workers.where.not(:pid => miq_pids)
return if orphaned_rows.empty?

_log.warn("Removing orphaned worker rows without corresponding processes: #{orphaned_rows.collect(&:pid).inspect}")
orphaned_rows.destroy_all
end

def cleanup_orphaned_workers
orphaned_workers = miq_pids - miq_workers.pluck(:pid)
return if orphaned_workers.empty?

_log.warn("Removing orphaned processes without corresponding worker rows: #{orphaned_workers.inspect}")
orphaned_workers.each { |pid| ::Process.kill(9, pid) }
end

def monitor_workers
super

Expand Down Expand Up @@ -74,5 +90,13 @@ def validate_worker(worker)

private

attr_accessor :miq_processes
attr_reader :miq_processes_by_pid

def miq_processes
miq_processes_by_pid.values
end

def miq_pids
miq_processes_by_pid.keys
end
end
6 changes: 6 additions & 0 deletions app/models/miq_server/worker_management/systemd.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ def sync_stopping_workers
end
end

def cleanup_orphaned_worker_rows
end

def cleanup_orphaned_workers
end

def cleanup_failed_workers
super

Expand Down
3 changes: 1 addition & 2 deletions spec/models/miq_server/worker_management/kubernetes_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,8 @@

context "#sync_from_system" do
context "#ensure_kube_monitors_started" do
it "podified, ensures pod monitor started and orphaned rows are removed" do
it "podified, ensures pod monitor started" do
expect(server.worker_manager).to receive(:ensure_kube_monitors_started)
expect(server.worker_manager).to receive(:cleanup_orphaned_worker_rows)
server.worker_manager.sync_from_system
end
end
Expand Down

0 comments on commit bcd5b98

Please sign in to comment.