Skip to content

Commit

Permalink
fix: capture errors that shouldnt fail the transaction
Browse files Browse the repository at this point in the history
We introduced a ResolutionError to mark errors during the new plan
network resolution shouldnt be raised, which causes the whole transation
to fail. Instead, we capture them and fail the deployment of the
specific tasks that caused them.
  • Loading branch information
jhonasiv committed Dec 6, 2024
1 parent 513e480 commit b03fd08
Show file tree
Hide file tree
Showing 10 changed files with 168 additions and 42 deletions.
4 changes: 2 additions & 2 deletions lib/syskit/cli/doc/gen.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def self.save_profile_model(target_path, profile_m)
# @param [Actions::Profile::Definition] profile_def
def self.save_profile_definition(target_path, profile_def)
begin
task = compute_system_network(
task, = compute_system_network(
profile_def,
validate_abstract_network: false,
validate_generated_network: false
Expand Down Expand Up @@ -266,7 +266,7 @@ def self.compute_system_network(model, main_plan = Roby::Plan.new, **options)
main_plan.add(original_task = model.as_plan)
engine = Syskit::NetworkGeneration::Engine.new(main_plan)
planning_task = original_task.planning_task
mapping = engine.compute_system_network([planning_task], **options)
mapping, = engine.compute_system_network([planning_task], **options)

if engine.work_plan.respond_to?(:commit_transaction)
engine.work_plan.commit_transaction
Expand Down
2 changes: 1 addition & 1 deletion lib/syskit/gui/component_network_view.rb
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def render(model,
begin
if method == :compute_system_network
tic = Time.now
@task = compute_system_network(model, plan)
@task, = compute_system_network(model, plan)
timing = Time.now - tic
elsif method == :compute_deployed_network
tic = Time.now
Expand Down
2 changes: 1 addition & 1 deletion lib/syskit/network_generation/async.rb
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def apply
engine.discard_work_plan
false
elsif future.fulfilled?
required_instances = future.value
required_instances, resolution_errors = future.value
begin
engine.apply_system_network_to_plan(
required_instances, **@apply_system_network_options
Expand Down
28 changes: 23 additions & 5 deletions lib/syskit/network_generation/engine.rb
Original file line number Diff line number Diff line change
Expand Up @@ -710,14 +710,14 @@ def compute_system_network(
system_network_generator = SystemNetworkGenerator.new(
work_plan, event_logger: event_logger, merge_solver: merge_solver
)
toplevel_tasks = system_network_generator.generate(
toplevel_tasks, resolution_errors = system_network_generator.generate(
instance_requirements,
garbage_collect: garbage_collect,
validate_abstract_network: validate_abstract_network,
validate_generated_network: validate_generated_network
)

Hash[requirement_tasks.zip(toplevel_tasks)]
[Hash[requirement_tasks.zip(toplevel_tasks)], resolution_errors]
end

# Computes the system network, that is the network that fullfills
Expand Down Expand Up @@ -752,7 +752,7 @@ def resolve_system_network(
compute_policies: true
)

required_instances = compute_system_network(
required_instances, resolution_errors = compute_system_network(
requirement_tasks,
garbage_collect: garbage_collect,
validate_abstract_network: validate_abstract_network,
Expand All @@ -768,7 +768,7 @@ def resolve_system_network(
)
end
end
required_instances
[required_instances, resolution_errors]
end

# Generate the deployment according to the current requirements, and
Expand Down Expand Up @@ -806,7 +806,7 @@ def resolve(
validate_deployed_network: true,
validate_final_network: true
)
required_instances = resolve_system_network(
required_instances, resolution_errors = resolve_system_network(
requirement_tasks,
garbage_collect: garbage_collect,
validate_abstract_network: validate_abstract_network,
Expand All @@ -823,11 +823,29 @@ def resolve(
garbage_collect: garbage_collect,
validate_final_network: validate_final_network
)

resolution_errors = replace_resolution_errors_tasks(resolution_errors)
resolution_errors.each do |error|
plan.execution_engine.add_error(error)
end
rescue Exception => e # rubocop:disable Lint/RescueException
handle_resolution_exception(e, on_error: on_error)
raise
end

# Replace the failed tasks in resolution errors for the tasks found in the
# real plan
#
# @param [Array] resolution_errors all the resolution errors that should have
# their failed tasks replaced
# @return [Array] the newly generated resolution errors
def replace_resolution_errors_tasks(resolution_errors)
(resolution_errors || []).map do |original|
replacement = merge_solver.replacement_for(original.failed_task)
original.replace_failed_task(replacement)
end
end

def apply_system_network_to_plan(
required_instances, compute_deployments: true,
garbage_collect: true, validate_final_network: true
Expand Down
23 changes: 23 additions & 0 deletions lib/syskit/network_generation/exceptions.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# frozen_string_literal: true

require "roby/standard_errors"

module Syskit
module NetworkGeneration
class ResolutionError < Roby::LocalizedError
def replace_failed_task(replacement)
ResolutionError.new(replacement, original_exceptions, message)
end

def initialize(failed_task, original_exceptions = [], message = "")
super(failed_task)
@message = message
report_exceptions_from(original_exceptions)
end

def pretty_print(pp)
each_original_exception { |exception| exception.pretty_print(pp) }
end
end
end
end
59 changes: 45 additions & 14 deletions lib/syskit/network_generation/system_network_generator.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require "syskit/network_generation/exceptions"

module Syskit
module NetworkGeneration
# Generate a plan from a set of {InstanceRequirement} objects
Expand Down Expand Up @@ -241,17 +243,26 @@ def compute_system_network(instance_requirements, garbage_collect: true,
end
log_timepoint "postprocessing"

resolution_errors = validate_network(
validate_abstract_network: validate_abstract_network,
validate_generated_network: validate_generated_network
)
remove_tasks_from_plan_due_to_resolution_errors(resolution_errors)
[@toplevel_tasks, resolution_errors]
end

def validate_network(validate_abstract_network: true,
validate_generated_network: true)
if validate_abstract_network
self.validate_abstract_network
log_timepoint "validate_abstract_network"
end

if validate_generated_network
self.validate_generated_network
resolution_errors = self.validate_generated_network
log_timepoint "validate_generated_network"
end

@toplevel_tasks
resolution_errors || []
end

def toplevel_tasks_to_requirements
Expand All @@ -261,6 +272,14 @@ def toplevel_tasks_to_requirements
.each_with_object({}) { |(t, ir), h| (h[t] ||= []) << ir }
end

def remove_tasks_from_plan_due_to_resolution_errors(resolution_errors)
resolution_errors.each do |resolution_error|
failed_task = resolution_error.failed_task
@toplevel_tasks.delete(failed_task)
plan.remove_task(failed_task)
end
end

# Verifies that the task allocation is complete
#
# @param [Roby::Plan] plan the plan on which we are working
Expand All @@ -270,11 +289,12 @@ def self.verify_task_allocation(
plan, components: plan.find_local_tasks(AbstractComponent)
)
still_abstract = components.find_all(&:abstract?)
return if still_abstract.empty?

raise TaskAllocationFailed.new(self, still_abstract),
still_abstract.map do |task|
exception = TaskAllocationFailed.new(self, [task])
ResolutionError.new(task, exception,
"could not find implementation for the following abstract " \
"tasks: #{still_abstract}"
"tasks: #{still_abstract}")
end
end

# Verifies that there are no multiple output - single input
Expand Down Expand Up @@ -322,25 +342,31 @@ def self.verify_device_allocation(plan, toplevel_tasks_to_requirements = {})
t.model.each_master_driver_service
.any? { |srv| !t.find_device_attached_to(srv) }
end
unless missing_devices.empty?
raise DeviceAllocationFailed.new(plan, missing_devices),
"could not allocate devices for the following tasks: " \
"#{missing_devices}"
resolution_errors = missing_devices.map do |dev|
allocation_error = DeviceAllocationFailed.new(plan, dev)
resolution_errors <<
ResolutionError.new(dev, allocation_error,
"could not allocate a device for the task: #{dev}")
end

devices = {}
components.each do |task|
task.each_master_device do |dev|
device_name = dev.full_name
if (old_task = devices[device_name])
raise ConflictingDeviceAllocation.new(
allocation_error = ConflictingDeviceAllocation.new(
dev, task, old_task, toplevel_tasks_to_requirements
)
resolution_errors <<
ResolutionError.new(old_task, allocation_error)
resolution_errors <<
ResolutionError.new(task, allocation_error)
else
devices[device_name] = task
end
end
end
resolution_errors
end

# Validates the network generated by {#compute_system_network}
Expand All @@ -354,9 +380,14 @@ def validate_abstract_network

# Validates the network generated by {#compute_system_network}
def validate_generated_network
self.class.verify_task_allocation(plan)
self.class.verify_device_allocation(plan, toplevel_tasks_to_requirements)
resolution_errors = self.class.verify_task_allocation(plan)
device_allocation_errors = self.class.verify_device_allocation(
plan, toplevel_tasks_to_requirements
)
resolution_errors.concat(device_allocation_errors)
super if defined? super

resolution_errors.flatten
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/syskit/test/network_manipulation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def syskit_generate_network(*to_instanciate, add_missions: true)
end
task_mapping = plan.in_transaction do |trsc|
engine = NetworkGeneration::Engine.new(plan, work_plan: trsc)
mapping = engine.compute_system_network(
mapping, = engine.compute_system_network(
tasks_to_instanciate.map(&:planning_task),
validate_generated_network: false
)
Expand Down
21 changes: 21 additions & 0 deletions test/network_generation/test_async.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,27 @@ def assert_future_fulfilled(future)
assert subject.finished?
assert_raises(error_t) { subject.apply }
end

it "goes through with the plan even if some tasks could not be resolved" do
srv_m = Syskit::DataService.new_submodel
plan.add(task = Models::Placeholder.create_for([srv_m]).new)

resolution_errors = [ResolutionError.new(task, RuntimeError)]
requirements = Set[flexmock]
resolution = subject.prepare(requirements)
engine = flexmock(resolution.engine, :strict)
engine.should_receive(:resolve_system_network)
.and_return([[], resolution_errors])
engine.should_receive(:apply_system_network_to_plan).once
flexmock(engine).should_receive(:replace_resolution_errors_tasks)
.once.pass_thru
flexmock(subject.plan.execution_engine).should_receive(:add_error)
.with(*resolution_errors)
resolution.execute
subject.join
assert subject.finished?
subject.apply
end
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion test/network_generation/test_engine.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def work_plan
it "saves the mapping from requirement task in real_plan to instanciated task in work_plan" do
flexmock(requirements).should_receive(:instanciate)
.and_return(instanciated_task = simple_component_model.new)
mapping = syskit_engine.compute_system_network([planning_task])
mapping, = syskit_engine.compute_system_network([planning_task])
assert_equal instanciated_task, mapping[planning_task]
end
end
Expand Down
Loading

0 comments on commit b03fd08

Please sign in to comment.