Herb-AI · ReubenJ · Apr 18, 2024 · Apr 4, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/src/HerbSearch.jl b/src/HerbSearch.jl
@@ -36,6 +36,8 @@ include("genetic_functions/crossover.jl")
 include("genetic_functions/select_parents.jl")
 include("genetic_search_iterator.jl")
 
+include("random_iterator.jl")
+
 export 
   count_expressions,
   ProgramIterator,
@@ -48,6 +50,8 @@ export
   heuristic_random,
   heuristic_smallest_domain,
 
+  derivation_heuristic,
+
   synth,
   SynthResult,
   optimal_program,
@@ -56,6 +60,7 @@ export
   FixedShapedIterator,
 
   TopDownIterator,
+  RandomIterator,
   BFSIterator,
   DFSIterator,
   MLFSIterator,

diff --git a/src/fixed_shaped_iterator.jl b/src/fixed_shaped_iterator.jl
@@ -2,18 +2,16 @@ Base.@doc """
     @programiterator FixedShapedIterator()
 
 Enumerates all programs that extend from the provided fixed shaped tree.
-The [Solver](@ref) is required to be in a state without any [Hole](@ref)s 
+The [Solver](@ref) is required to be in a state without any [Hole](@ref)s.
+
+!!! warning: this iterator is used as a baseline for the constraint propagation thesis. After the thesis, this iterator can (and should) be deleted.
 """ FixedShapedIterator
 @programiterator FixedShapedIterator()
 
 """
     priority_function(::FixedShapedIterator, g::AbstractGrammar, tree::AbstractRuleNode, parent_value::Union{Real, Tuple{Vararg{Real}}})
 
 Assigns a priority value to a `tree` that needs to be considered later in the search. Trees with the lowest priority value are considered first.
-
-- `g`: The grammar used for enumeration
-- `tree`: The tree that is about to be stored in the priority queue
-- `parent_value`: The priority value of the parent [`State`](@ref)
 """
 function priority_function(
     ::FixedShapedIterator, 
@@ -41,10 +39,10 @@ Describes the iteration for a given [`TopDownIterator`](@ref) over the grammar.
 """
 function Base.iterate(iter::FixedShapedIterator)
     # Priority queue with number of nodes in the program
-    pq :: PriorityQueue{State, Union{Real, Tuple{Vararg{Real}}}} = PriorityQueue()
+    pq :: PriorityQueue{SolverState, Union{Real, Tuple{Vararg{Real}}}} = PriorityQueue()
 
     solver = iter.solver
-    @assert !contains_variable_shaped_hole(get_tree(iter.solver)) "A FixedShapedIterator cannot iterate partial programs with VariableShapedHoles"
+    @assert !contains_variable_shaped_hole(get_tree(iter.solver)) "A FixedShapedIterator cannot iterate partial programs with Holes"
 
     if isfeasible(solver)
         enqueue!(pq, get_state(solver), priority_function(iter, get_grammar(solver), get_tree(solver), 0))
@@ -85,7 +83,7 @@ function _find_next_complete_tree(
             # The maximum depth is reached
             continue
         elseif hole_res isa HoleReference
-            # Uniform Hole was found
+            # UniformHole was found
             # TODO: problem. this 'hole' is tied to a target state. it should be state independent
             (; hole, path) = hole_res
 

diff --git a/src/program_iterator.jl b/src/program_iterator.jl
@@ -15,8 +15,7 @@ abstract type ProgramIterator end
 
 Base.IteratorSize(::ProgramIterator) = Base.SizeUnknown()
 
-#TODO: currently, ProgramIterator will not create `StateFixedShapedHole` yet, but this should be possible
-Base.eltype(::ProgramIterator) = Union{RuleNode, StateFixedShapedHole}
+Base.eltype(::ProgramIterator) = Union{RuleNode, StateHole}
 
 """
     @programiterator

diff --git a/src/random_iterator.jl b/src/random_iterator.jl
@@ -0,0 +1,72 @@
+function rand_with_constraints!(solver::Solver,path::Vector{Int})
+    skeleton = get_node_at_location(solver,path)
+    grammar = get_grammar(solver)
+    @info "The maximum depth is $(get_max_depth(solver) - length(path)). $(get_max_depth(solver))"
+    return _rand_with_constraints!(skeleton,solver, path, mindepth_map(grammar), get_max_depth(solver))
+end
+
+function _rand_with_constraints!(skeleton::RuleNode,solver::Solver,path::Vector{Int},dmap::AbstractVector{Int}, remaining_depth::Int=10) 
+    @info "The depth RuleNode left: $remaining_depth"
+
+    for (i,child) ∈ enumerate(skeleton.children)
+        push!(path,i)
+        _rand_with_constraints!(child,solver,path, dmap, remaining_depth - 1)
+        pop!(path)
+    end
+    return get_tree(solver)
+end
+
+function _rand_with_constraints!(hole::AbstractHole,solver::Solver,path::Vector{Int},dmap::AbstractVector{Int}, remaining_depth::Int=10) 
+    @info "The depth hole left: $remaining_depth"
+
+    hole = get_hole_at_location(solver, path)
+
+    # TODO : probabilistic grammars support
+    filtered_rules = filter(r->dmap[r] ≤ remaining_depth, findall(hole.domain))
+    state = save_state!(solver)
+    @assert !isfilled(hole)
+
+    shuffle!(filtered_rules)
+    found_feasable = false
+    for rule_index ∈ filtered_rules
+        remove_all_but!(solver,path,rule_index)
+        if isfeasible(solver)
+            found_feasable = true
+            break
+        end
+        load_state!(solver,state)
+        state = save_state!(solver)
+    end
+
+    if !found_feasable
+        error("rand with constraints failed because there are no feasible rules to use")
+    end
+
+    subtree = get_node_at_location(solver, path)
+    for (i,child) ∈ enumerate(subtree.children)
+        push!(path,i)
+        _rand_with_constraints!(child,solver,path, dmap, remaining_depth - 1)
+        pop!(path)
+    end
+    return get_tree(solver)
+end
+
+
+@programiterator RandomSearchIterator(
+    path::Vector{Int} = Vector{Int}()
+    # TODO: Maybe limit number of iterations
+)
+
+Base.IteratorSize(::RandomSearchIterator) = Base.SizeUnknown()
+Base.eltype(::RandomSearchIterator) = RuleNode
+
+function Base.iterate(iter::RandomSearchIterator)
+    solver_state = save_state!(iter.solver) #TODO: if this is the last iteration, don't save the state
+    return rand_with_constraints!(iter.solver, iter.path), solver_state
+end
+
+function Base.iterate(iter::RandomSearchIterator, solver_state::SolverState)
+    load_state!(iter.solver, solver_state)
+    solver_state = save_state!(iter.solver) #TODO: if this is the last iteration, don't save the state
+    return rand_with_constraints!(iter.solver, iter.path), solver_state
+end
diff --git a/src/search_procedure.jl b/src/search_procedure.jl
@@ -41,9 +41,11 @@ function synth(
         # Evaluate the expression
         score = evaluate(problem, expr, symboltable, shortcircuit=shortcircuit, allow_evaluation_errors=allow_evaluation_errors)
         if score == 1
+            candidate_program = freeze_state(candidate_program)
             return (candidate_program, optimal_program)
         elseif score >= best_score
             best_score = score
+            candidate_program = freeze_state(candidate_program)
             best_program = candidate_program
         end
 

diff --git a/src/stochastic_functions/propose.jl b/src/stochastic_functions/propose.jl
@@ -4,7 +4,6 @@ These subprograms are supposed to replace the subprogram at neighbourhood node l
 It is the responsibility of the caller to make this replacement.
 """
 
-
 """
     random_fill_propose(current_program::RuleNode, neighbourhood_node_loc::NodeLoc, grammar::AbstractGrammar, max_depth::Int, dmap::AbstractVector{Int}, dict::Union{Nothing,Dict{String,Any}})
 
@@ -17,28 +16,10 @@ Returns a list with only one proposed, completely random, subprogram.
 - `dmap::AbstractVector{Int} : the minimum possible depth to reach for each rule`
 - `dict::Dict{String, Any}`: the dictionary with additional arguments; not used.
 """
-function random_fill_propose(current_program::RuleNode, neighbourhood_node_loc::NodeLoc, grammar::AbstractGrammar, max_depth::Int, dmap::AbstractVector{Int}, dict::Union{Nothing,Dict{String,Any}})
-    # it can change the current_program for fast replacing of the node
-    # find the symbol of subprogram
-    subprogram = get(current_program, neighbourhood_node_loc)
-    neighbourhood_symbol = return_type(grammar, subprogram)
-
-    # find the depth of subprogram 
-    current_depth = node_depth(current_program, subprogram)
-    # this is depth that we can still generate without exceeding max_depth
-    remaining_depth = max_depth - current_depth + 1
-
-    if remaining_depth == 0
-        # can't expand more => return current program 
-        @warn "Can't extend program because we reach max_depth $(rulenode2expr(current_program, grammar))"
-        return [current_program]
-    end
-
-    # generate completely random expression (subprogram) with remaining_depth
-    replacement = rand(RuleNode, grammar, neighbourhood_symbol, dmap, remaining_depth)
-
-    return [replacement]
-end
+function random_fill_propose(solver::Solver, path::Vector{Int}, dict::Union{Nothing,Dict{String,Any}})
+    return Iterators.take(RandomSearchIterator(get_grammar(solver), :ThisIsIgnored, solver=solver, path = path),5)
+    #return Iterators.take(RandomIterator(get_grammar(solver), :ThisIsIgnored, solver=solver, max_depth=get_max_depth(solver), max_size=get_max_size(solver)),N)
+end 
 
 """
     enumerate_neighbours_propose(enumeration_depth::Int64)
@@ -48,19 +29,11 @@ The return function is a function that produces a list with all the subprograms
 - `enumeration_depth::Int64`: the maximum enumeration depth.
 """
 function enumerate_neighbours_propose(enumeration_depth::Int64)
-    return (current_program::RuleNode, neighbourhood_node_loc::NodeLoc, grammar::AbstractGrammar, max_depth::Int, dmap::AbstractVector{Int}, dict::Union{Nothing,Dict{String,Any}}) -> begin
-        # it can change the current_program for fast replacing of the node
-        # find the symbol of subprogram
-        subprogram = get(current_program, neighbourhood_node_loc)
-        neighbourhood_symbol = return_type(grammar, subprogram)
-
-        # find the depth of subprogram
-        current_depth = node_depth(current_program, subprogram)
-        # this is depth that we can still generate without exceeding max_depth
-        remaining_depth = max_depth - current_depth + 1  
-        depth_left = min(remaining_depth, enumeration_depth)
-
-        return BFSIterator(grammar, neighbourhood_symbol, max_depth=depth_left)  
+    return (solver::Solver, path::Vector{Int}, dict::Union{Nothing,Dict{String,Any}}) -> begin
+        #TODO: Fix the ProgramIterator (macro)
+        # Make sure it doesn't overwrite (grammar, sym, max_depth, max_size) of the Solver.
+        # Ideally this line should be: BFSIterator(solver).
+        return BFSIterator(get_grammar(solver), :ThisIsIgnored, solver=solver, max_depth=get_max_depth(solver), max_size=get_max_size(solver))
     end
 end
 

diff --git a/src/stochastic_iterator.jl b/src/stochastic_iterator.jl
@@ -57,9 +57,29 @@ Base.eltype(::StochasticSearchIterator) = RuleNode
 
 function Base.iterate(iter::StochasticSearchIterator)
     grammar, max_depth = iter.grammar, iter.max_depth
+
+
+    #TODO: instantiating the solver should be in the program iterator macro
+    if isnothing(iter.solver)
+        iter.solver = GenericSolver(iter.grammar, iter.sym)
+    end
+
+    #TODO: these attributes should be part of the solver, not of the iterator
+    solver = iter.solver
+    solver.max_size = iter.max_size
+    solver.max_depth = iter.max_depth
+
+
     # sample a random node using start symbol and grammar
     dmap = mindepth_map(grammar)
-    sampled_program = rand(RuleNode, grammar, iter.sym, max_depth)
+    sampled_program = rand(RuleNode, grammar, iter.sym, max_depth) #TODO: replace iter.sym with a domain of valid rules
+    substitute!(solver, Vector{Int}(), sampled_program)
+    while !isfeasible(solver)
+        #TODO: prevent infinite loops here. Check max_time and/or max_enumerations.
+        sampled_program = rand(RuleNode, grammar, iter.sym, max_depth) #TODO: replace iter.sym with a domain of valid rules
+        substitute!(solver, Vector{Int}(), sampled_program)
+    end
+
 
     return (sampled_program, IteratorState(sampled_program, iter.initial_temperature,dmap))  
 end
@@ -76,16 +96,16 @@ The algorithm that constructs the iterator of StochasticSearchIterator. It has t
 4. accept the new program by modifying the next_program or reject the new program
 5. return the new next_program
 """
-function Base.iterate(iter::StochasticSearchIterator, current_state::IteratorState)
-    grammar, examples = iter.grammar, iter.spec
-    current_program = current_state.current_program
+function Base.iterate(iter::StochasticSearchIterator, iterator_state::IteratorState)
+    grammar, examples, solver = iter.grammar, iter.spec, iter.solver
+    current_program = get_tree(solver)#iterator_state.current_program
 
     current_cost = calculate_cost(iter, current_program)
 
-    new_temperature = temperature(iter, current_state.current_temperature)
+    new_temperature = temperature(iter, iterator_state.current_temperature)
 
     # get the neighbour node location 
-    neighbourhood_node_location, dict = neighbourhood(iter, current_state.current_program)
+    neighbourhood_node_location, dict = neighbourhood(iter, current_program)
 
     # get the subprogram pointed by node-location
     subprogram = get(current_program, neighbourhood_node_location)
@@ -94,42 +114,52 @@ function Base.iterate(iter::StochasticSearchIterator, current_state::IteratorSta
     @info "Start: $(rulenode2expr(current_program, grammar)), subexpr: $(rulenode2expr(subprogram, grammar)), cost: $current_cost
             temp $new_temperature"
 
+    # remove the rule node by substituting it with a hole of the same symbol
+    original_node = get(current_program, neighbourhood_node_location)
+    path = get_path(current_program, original_node)
+    original_state = save_state!(solver)
+
+    remove_node!(solver, path)
+
     # propose new programs to consider. They are programs to put in the place of the nodelocation
-    possible_replacements = propose(iter, current_program, neighbourhood_node_location, current_state.dmap, dict)
+    # propose should give full programs
+    possible_programs = propose(iter, path, dict)
+
+    # try to improve the program using any of the possible replacements
+    improved_program = try_improve_program!(iter, possible_programs, neighbourhood_node_location, new_temperature, current_cost)
+
+    if isnothing(improved_program)
+        load_state!(solver, original_state)
+    else 
+        new_state!(solver, improved_program)
+    end
+
+    @assert isfeasible(solver)
+    @assert !contains_hole(get_tree(solver))
 
-    next_program = get_next_program(iter, current_program, possible_replacements, neighbourhood_node_location, new_temperature, current_cost)
-    next_state = IteratorState(next_program,new_temperature,current_state.dmap)
-    return (next_program, next_state)
+    next_state = IteratorState(get_tree(solver), new_temperature,iterator_state.dmap)
+    return (get_tree(solver), next_state)
 end
 
 
-function get_next_program(iter::StochasticSearchIterator, current_program::RuleNode, possible_replacements, neighbourhood_node_location::NodeLoc, new_temperature, current_cost)
-    next_program = deepcopy(current_program)
-    possible_program = current_program
-    for possible_replacement in possible_replacements
-        # replace node at node_location with possible_replacement 
-        if neighbourhood_node_location.i == 0
-            possible_program = possible_replacement
-        else
-            # update current_program with the subprogram generated
-            neighbourhood_node_location.parent.children[neighbourhood_node_location.i] = possible_replacement
-        end
+function try_improve_program!(iter::StochasticSearchIterator, possible_programs, neighbourhood_node_location::NodeLoc, new_temperature, current_cost)
+    best_program = nothing
+    for possible_program in possible_programs
         program_cost = calculate_cost(iter, possible_program)
-        if accept(iter, current_cost, program_cost, new_temperature) 
-            next_program = deepcopy(possible_program)
+        if accept(iter, current_cost, program_cost, new_temperature)
+            best_program = freeze_state(possible_program)
             current_cost = program_cost
         end
     end
-    return next_program
-
+    return best_program
 end
 
 """
     _calculate_cost(program::RuleNode, cost_function::Function, spec::AbstractVector{IOExample}, grammar::AbstractGrammar, evaluation_function::Function)
 
 Returns the cost of the `program` using the examples and the `cost_function`. It first convert the program to an expression and evaluates it on all the examples.
 """
-function _calculate_cost(program::RuleNode, cost_function::Function, spec::AbstractVector{IOExample}, grammar::AbstractGrammar, evaluation_function::Function)
+function _calculate_cost(program::Union{RuleNode, StateHole}, cost_function::Function, spec::AbstractVector{IOExample}, grammar::AbstractGrammar, evaluation_function::Function)
     results = Tuple{<:Number,<:Number}[]
 
     expression = rulenode2expr(program, grammar)
@@ -144,11 +174,11 @@ function _calculate_cost(program::RuleNode, cost_function::Function, spec::Abstr
 end
 
 """
-    calculate_cost(iter::T, program::RuleNode) where T <: StochasticSearchIterator
+    calculate_cost(iter::T, program::Union{RuleNode, StateHole}) where T <: StochasticSearchIterator
 
 Wrapper around [`_calculate_cost`](@ref).
 """
-calculate_cost(iter::T, program::RuleNode) where T <: StochasticSearchIterator = _calculate_cost(program, iter.cost_function, iter.spec, iter.grammar, iter.evaluation_function)
+calculate_cost(iter::T, program::Union{RuleNode, StateHole}) where T <: StochasticSearchIterator = _calculate_cost(program, iter.cost_function, iter.spec, iter.grammar, iter.evaluation_function)
 
 neighbourhood(iter::T, current_program::RuleNode) where T <: StochasticSearchIterator = constructNeighbourhood(current_program, iter.grammar)
 
@@ -170,7 +200,7 @@ The temperature value of the algorithm remains constant over time.
     evaluation_function::Function = execute_on_input, 
 ) <: StochasticSearchIterator
 
-propose(iter::MHSearchIterator, current_program::RuleNode, neighbourhood_node_loc::NodeLoc, dmap::AbstractVector{Int}, dict::Union{Nothing,Dict{String,Any}}) = random_fill_propose(current_program, neighbourhood_node_loc, iter.grammar, iter.max_depth, dmap, dict)
+propose(iter::MHSearchIterator, path::Vector{Int}, dict::Union{Nothing,Dict{String,Any}}) = random_fill_propose(iter.solver, path, dict)
 
 temperature(::MHSearchIterator, current_temperature::Real) = const_temperature(current_temperature)
 
@@ -196,7 +226,7 @@ The temperature value of the algorithm remains constant over time.
     evaluation_function::Function = execute_on_input
 ) <: StochasticSearchIterator
 
-propose(iter::VLSNSearchIterator, current_program::RuleNode, neighbourhood_node_loc::NodeLoc, dmap::AbstractVector{Int}, dict::Union{Nothing,Dict{String,Any}}) = enumerate_neighbours_propose(iter.vlsn_neighbourhood_depth)(current_program, neighbourhood_node_loc, iter.grammar, iter.max_depth, dmap, dict)
+propose(iter::VLSNSearchIterator, path::Vector{Int}, dict::Union{Nothing,Dict{String,Any}}) = enumerate_neighbours_propose(iter.vlsn_neighbourhood_depth)(iter.solver, path, dict)
 
 temperature(::VLSNSearchIterator, current_temperature::Real) = const_temperature(current_temperature)
 
@@ -223,7 +253,7 @@ but takes into account the tempeerature too.
     evaluation_function::Function = execute_on_input
 ) <: StochasticSearchIterator
 
-propose(iter::SASearchIterator, current_program::RuleNode, neighbourhood_node_loc::NodeLoc, dmap::AbstractVector{Int}, dict::Union{Nothing,Dict{String,Any}}) = random_fill_propose(current_program, neighbourhood_node_loc, iter.grammar, iter.max_depth, dmap, dict)
+propose(iter::SASearchIterator, path::Vector{Int}, dict::Union{Nothing,Dict{String,Any}}) = random_fill_propose(iter.solver, path, dict)
 
 temperature(iter::SASearchIterator, current_temperature::Real) = decreasing_temperature(iter.temperature_decreasing_factor)(current_temperature)