From 2b54e84e6cd84ab7e7129a7df71d24376c9d3878 Mon Sep 17 00:00:00 2001 From: Timur Ildarovich Mukminov Date: Mon, 6 May 2024 12:35:09 +0200 Subject: [PATCH 01/11] Update grammar --- src/HerbSearch.jl | 4 ++- src/getting_started.jl | 30 ++++++++++++++++ src/probe/probe_iterator.jl | 72 ++++++++++++++++++++++++++++++++----- 3 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 src/getting_started.jl diff --git a/src/HerbSearch.jl b/src/HerbSearch.jl index 796acab..1f93c0d 100644 --- a/src/HerbSearch.jl +++ b/src/HerbSearch.jl @@ -82,5 +82,7 @@ export misclassification, validate_iterator, sample, - rand + rand, + probe, + guided_search end # module HerbSearch diff --git a/src/getting_started.jl b/src/getting_started.jl new file mode 100644 index 0000000..6d6a1f9 --- /dev/null +++ b/src/getting_started.jl @@ -0,0 +1,30 @@ + +using HerbGrammar, HerbSpecification, HerbSearch + +my_replace(x,y,z) = replace(x,y => z, count = 1) + +grammar = @pcsgrammar begin + 0.188 : S = arg + 0.188 : S = "" + 0.188 : S = "<" + 0.188 : S = ">" + 0.188 : S = my_replace(S,S,S) + 0.059 : S = S * S +end + +examples = [ + IOExample(Dict(:arg => "a < 4 and a > 0"), "a 4 and a 0") # <- e0 with correct space + # IOExample(Dict(:arg => "a < 4 and a > 0"), "a 4 and a 0") # <- e0 with incorrect space + IOExample(Dict(:arg => ""), "open and close") # <- e1 + IOExample(Dict(:arg => " to number"), "Change string to a number") + ] + +iter = HerbSearch.GuidedSearchIterator(grammar, :S, examples, SymbolTable(grammar)) +# @profview program = @time probe(examples, iter, identity, identity, 3600, 10000) +for i in 1:6 + print(iter.grammar.log_probabilities[i]) +end +program = @time probe(examples, iter, 3600, 10000) + + +rulenode2expr(program, grammar) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 03f0c7c..0fcd45f 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -4,15 +4,17 @@ struct ProgramCache cost::Int end -function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, select::Function, update!::Function, max_time::Int, iteration_size::Int) +function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_time::Int, iteration_size::Int) start_time = time() # store a set of all the results of evaluation programs eval_cache = Set() state = nothing symboltable = SymbolTable(iterator.grammar) + len_partial = 0 # start next iteration while there is time left while time() - start_time < max_time i = 1 + updated = false # partial solutions stores not only the program but also evaluation info psol_with_eval_cache = Vector{ProgramCache}() next = state === nothing ? iterate(iterator) : iterate(iterator, state) @@ -49,25 +51,77 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, select: next = iterate(iterator, state) i += 1 end - + # println(i) # check if program iterator is exhausted if next === nothing return nothing end - partial_sols = select(psol_with_eval_cache) # select promising partial solutions + partial_sols = selectpsol_largest_subset(psol_with_eval_cache) # select promising partial solutions + println(length(partial_sols)) + if len_partial < length(partial_sols) + len_partial = length(partial_sols) + updated = true + end # # update probabilites if any promising partial solutions - # if !isempty(partial_sols) - # update!(iterator.grammar, partial_sols, eval_cache) # update probabilites - # # restart iterator - # eval_cache = Set() - # state = nothing - # end + if !isempty(partial_sols) && updated == true + update_grammar(iterator.grammar, partial_sols, examples) # update probabilites + # restart iterator + eval_cache = Set() + state = nothing + end end return nothing end +function update_grammar(grammar::ContextSensitiveGrammar, PSols_with_eval_cache::Vector{ProgramCache}, examples::Vector{<:IOExample}) + for rule_index in eachindex(grammar.rules) # iterate for each rule_index + highest_correct_nr = 0 + for psol in PSols_with_eval_cache + program = psol.program + len_correct_examples = length(psol.correct_examples) + # Asume this works + # check if the program tree has rule_index somewhere inside it using a recursive function + if contains_rule(program, rule_index) && len_correct_examples > highest_correct_nr + highest_correct_nr = len_correct_examples + end + end + fitnes = highest_correct_nr / length(examples) + println("Highest correct examples: $(highest_correct_nr)") + println("Fitness $(fitnes)") + p_uniform = 1 / length(grammar.rules) + + # compute (log2(p_u) ^ (1 - fit)) = (1-fit) * log2(p_u) + log_prob = ((1 - fitnes) * log(2, p_uniform)) #/Z figure out the Z + grammar.log_probabilities[rule_index] = log_prob + end + for i in 1:6 + print(grammar.log_probabilities[i]) + end + println() + for i in 1:6 + print(2 ^ (-1* grammar.log_probabilities[i])) + end + println() +end + +# I will asume this works +function contains_rule(program::RuleNode, rule_index::Int) + if program.ind == rule_index # if the rule is good return true + return true + else + for child in program.children + if contains_rule(child, rule_index) # if a child has that rule then return true + return true + end + end + return false # if no child has that rule return false + end +end + + + """ selectpsol_largest_subset(partial_sols::Vector{ProgramCache}) From 9f0b093492000415e3e460ca39416c86455d18fe Mon Sep 17 00:00:00 2001 From: Timur Ildarovich Mukminov Date: Mon, 6 May 2024 13:01:05 +0200 Subject: [PATCH 02/11] fixed function definition --- src/getting_started.jl | 2 +- src/probe/probe_iterator.jl | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/getting_started.jl b/src/getting_started.jl index 6d6a1f9..3ef0198 100644 --- a/src/getting_started.jl +++ b/src/getting_started.jl @@ -24,7 +24,7 @@ iter = HerbSearch.GuidedSearchIterator(grammar, :S, examples, SymbolTable(gramma for i in 1:6 print(iter.grammar.log_probabilities[i]) end -program = @time probe(examples, iter, 3600, 10000) +program = @time probe(examples, iter, identity, identity, 3600, 10000) rulenode2expr(program, grammar) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 5ebdb80..e7400f2 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -10,13 +10,12 @@ struct ProgramCache cost::Int end -function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_time::Int, iteration_size::Int) +function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, select::Function, update!::Function, max_time::Int, iteration_size::Int) start_time = time() # store a set of all the results of evaluation programs eval_cache = Set() state = nothing symboltable = SymbolTable(iterator.grammar) - len_partial = 0 # all partial solutions that were found so far all_selected_psols = Set{RuleNode}() # start next iteration while there is time left @@ -58,7 +57,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_ti next = iterate(iterator, state) i += 1 end - # println(i) + # check if program iterator is exhausted if next === nothing return nothing @@ -68,7 +67,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_ti if !isempty(partial_sols) push!(all_selected_psols, map(x -> x.program, partial_sols)...) end - # # update probabilites if any promising partial solutions + # update probabilites if any promising partial solutions if !isempty(partial_sols) # && updated == true update_grammar(iterator.grammar, partial_sols, examples) # update probabilites # restart iterator @@ -93,8 +92,8 @@ function update_grammar(grammar::ContextSensitiveGrammar, PSols_with_eval_cache: end end fitnes = highest_correct_nr / length(examples) - println("Highest correct examples: $(highest_correct_nr)") - println("Fitness $(fitnes)") + # println("Highest correct examples: $(highest_correct_nr)") + # println("Fitness $(fitnes)") p_uniform = 1 / length(grammar.rules) # compute (log2(p_u) ^ (1 - fit)) = (1-fit) * log2(p_u) From 9490534586f8a358eb4d25345d672fd880b2b166 Mon Sep 17 00:00:00 2001 From: Nicolae Filat Date: Mon, 6 May 2024 13:39:41 +0200 Subject: [PATCH 03/11] Add the select and update functions using as separate functions --- src/probe/probe_iterator.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 5ebdb80..2e8c5ba 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -10,6 +10,9 @@ struct ProgramCache cost::Int end +select(partial_sols::Vector{HerbSearch.ProgramCache}) = HerbSearch.selectpsol_largest_subset(partial_sols) +update!(grammar::ContextSensitiveGrammar, PSols_with_eval_cache::Vector{ProgramCache}, examples::Vector{<:IOExample}) = update_grammar(grammar,PSols_with_eval_cache, examples) + function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_time::Int, iteration_size::Int) start_time = time() # store a set of all the results of evaluation programs @@ -70,7 +73,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_ti end # # update probabilites if any promising partial solutions if !isempty(partial_sols) # && updated == true - update_grammar(iterator.grammar, partial_sols, examples) # update probabilites + update!(iterator.grammar, partial_sols, examples) # update probabilites # restart iterator eval_cache = Set() state = nothing From 01106c68370fa40d5e0c356c9ae1f7640bfc82c0 Mon Sep 17 00:00:00 2001 From: Nicolae Filat Date: Mon, 6 May 2024 13:57:02 +0200 Subject: [PATCH 04/11] Fix collecting iterators --- src/probe/probe_iterator.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 2e8c5ba..1c53140 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -175,7 +175,7 @@ function selectpsol_first_cheapest(partial_sols::Vector{ProgramCache}) end end # get the cheapest programs that satisfy unique subsets of examples - return values(mapping) + return collect(values(mapping)) end """ @@ -202,7 +202,7 @@ function selectpsol_all_cheapest(partial_sols::Vector{ProgramCache}) end end # get all cheapest programs that satisfy unique subsets of examples - return Iterators.flatten(values(mapping)) + return collect(Iterators.flatten(values(mapping))) end @programiterator GuidedSearchIterator( From 96647d0c7907062d50c94bf192db20d399c78217 Mon Sep 17 00:00:00 2001 From: Timur Ildarovich Mukminov Date: Mon, 6 May 2024 15:20:09 +0200 Subject: [PATCH 05/11] added for loops for prinitng --- src/getting_started.jl | 10 +++++----- src/probe/probe_iterator.jl | 12 +++++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/getting_started.jl b/src/getting_started.jl index 3ef0198..5b61f8b 100644 --- a/src/getting_started.jl +++ b/src/getting_started.jl @@ -9,13 +9,13 @@ grammar = @pcsgrammar begin 0.188 : S = "<" 0.188 : S = ">" 0.188 : S = my_replace(S,S,S) - 0.059 : S = S * S + 0.06 : S = S * S end examples = [ IOExample(Dict(:arg => "a < 4 and a > 0"), "a 4 and a 0") # <- e0 with correct space - # IOExample(Dict(:arg => "a < 4 and a > 0"), "a 4 and a 0") # <- e0 with incorrect space - IOExample(Dict(:arg => ""), "open and close") # <- e1 + # IOExample(Dict(:arg => ""), "open and close") # <- e1 + IOExample(Dict(:arg => "<<<"), "") IOExample(Dict(:arg => " to number"), "Change string to a number") ] @@ -24,7 +24,7 @@ iter = HerbSearch.GuidedSearchIterator(grammar, :S, examples, SymbolTable(gramma for i in 1:6 print(iter.grammar.log_probabilities[i]) end -program = @time probe(examples, iter, identity, identity, 3600, 10000) - +program = @time probe(examples, iter, 3600, 100000) rulenode2expr(program, grammar) + diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 29655a1..5cc271b 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -13,7 +13,7 @@ end select(partial_sols::Vector{HerbSearch.ProgramCache}) = HerbSearch.selectpsol_largest_subset(partial_sols) update!(grammar::ContextSensitiveGrammar, PSols_with_eval_cache::Vector{ProgramCache}, examples::Vector{<:IOExample}) = update_grammar(grammar,PSols_with_eval_cache, examples) -function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, select::Function, update!::Function, max_time::Int, iteration_size::Int) +function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_time::Int, iteration_size::Int) start_time = time() # store a set of all the results of evaluation programs eval_cache = Set() @@ -83,6 +83,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, select: end function update_grammar(grammar::ContextSensitiveGrammar, PSols_with_eval_cache::Vector{ProgramCache}, examples::Vector{<:IOExample}) + sum = 0 for rule_index in eachindex(grammar.rules) # iterate for each rule_index highest_correct_nr = 0 for psol in PSols_with_eval_cache @@ -100,15 +101,20 @@ function update_grammar(grammar::ContextSensitiveGrammar, PSols_with_eval_cache: p_uniform = 1 / length(grammar.rules) # compute (log2(p_u) ^ (1 - fit)) = (1-fit) * log2(p_u) + sum+=p_uniform^(1-fitnes) log_prob = ((1 - fitnes) * log(2, p_uniform)) #/Z figure out the Z grammar.log_probabilities[rule_index] = log_prob end + for rule_index in eachindex(grammar.rules) + grammar.log_probabilities[rule_index] = grammar.log_probabilities[rule_index] - log(2, sum) + end + println(map(x -> rulenode2expr(x.program, grammar), PSols_with_eval_cache)) for i in 1:6 - print(grammar.log_probabilities[i]) + print("$(grammar.log_probabilities[i]) ") end println() for i in 1:6 - print(2 ^ (-1* grammar.log_probabilities[i])) + print("$(2 ^ (grammar.log_probabilities[i])) ") end println() end From c8dc8aac06188f20bf55515d11ac350597a71c55 Mon Sep 17 00:00:00 2001 From: Timur Ildarovich Mukminov Date: Mon, 6 May 2024 17:39:53 +0200 Subject: [PATCH 06/11] Fix selection Co-authored-by: Nicolae Filat, --- src/getting_started.jl | 7 ++----- src/probe/probe_iterator.jl | 38 +++++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/getting_started.jl b/src/getting_started.jl index 5b61f8b..375bb6c 100644 --- a/src/getting_started.jl +++ b/src/getting_started.jl @@ -20,11 +20,8 @@ examples = [ ] iter = HerbSearch.GuidedSearchIterator(grammar, :S, examples, SymbolTable(grammar)) -# @profview program = @time probe(examples, iter, identity, identity, 3600, 10000) -for i in 1:6 - print(iter.grammar.log_probabilities[i]) -end -program = @time probe(examples, iter, 3600, 100000) +@profview program = @time probe(examples, iter, 40, 10000) +# program = @time probe(examples, iter, 3600, 10000) rulenode2expr(program, grammar) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 5cc271b..3f3a49c 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -4,13 +4,17 @@ Stores the evaluation cost and the program in a structure. This """ -struct ProgramCache +mutable struct ProgramCache program::RuleNode correct_examples::Vector{Int} cost::Int end +function Base.:(==)(a::ProgramCache, b::ProgramCache) + return a.program == b.program +end +Base.hash(a::ProgramCache) = hash(a.program) -select(partial_sols::Vector{HerbSearch.ProgramCache}) = HerbSearch.selectpsol_largest_subset(partial_sols) +select(partial_sols::Vector{ProgramCache}, all_selected_psols::Set{ProgramCache}) = HerbSearch.selectpsol_largest_subset(partial_sols, all_selected_psols) update!(grammar::ContextSensitiveGrammar, PSols_with_eval_cache::Vector{ProgramCache}, examples::Vector{<:IOExample}) = update_grammar(grammar,PSols_with_eval_cache, examples) function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_time::Int, iteration_size::Int) @@ -20,7 +24,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_tim state = nothing symboltable = SymbolTable(iterator.grammar) # all partial solutions that were found so far - all_selected_psols = Set{RuleNode}() + all_selected_psols = Set{ProgramCache}() # start next iteration while there is time left while time() - start_time < max_time i = 1 @@ -65,17 +69,30 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_tim if next === nothing return nothing end - # select promising partial solutions that did not appear before - partial_sols = filter(x -> x.program ∉ all_selected_psols, select(psol_with_eval_cache)) + # select promising partial solutions that did not appear before + # if (isempty(all_selected_psols)) + # push!(all_selected_psols, psol_with_eval_cache...) + # end + partial_sols = filter(x -> x ∉ all_selected_psols, select(psol_with_eval_cache, all_selected_psols)) if !isempty(partial_sols) - push!(all_selected_psols, map(x -> x.program, partial_sols)...) - end - # update probabilites if any promising partial solutions - if !isempty(partial_sols) # && updated == true + print(rulenode2expr(partial_sols[1].program, iterator.grammar)) + push!(all_selected_psols, partial_sols...) + # update probabilites if any promising partial solutions update!(iterator.grammar, partial_sols, examples) # update probabilites # restart iterator eval_cache = Set() state = nothing + + #for loop to update all_selected_psols + new_all_selected = Set{ProgramCache}() + for prog_with_cache ∈ all_selected_psols + program = prog_with_cache.program + new_cost = calculate_program_cost(program, iterator.grammar) + prog_with_cache.cost = new_cost + # program_cache = ProgramCache(program, prog_with_cache.correct_examples, cost) + # push!(new_all_selected, program_cache) + end + # all_selected_psols = new_all_selected end end @@ -141,10 +158,11 @@ end This scheme selects a single cheapest program (first enumerated) that satisfies the largest subset of examples encountered so far across all partial_sols. """ -function selectpsol_largest_subset(partial_sols::Vector{ProgramCache}) +function selectpsol_largest_subset( partial_sols::Vector{ProgramCache}, all_selected_psols::Set{ProgramCache}) if isempty(partial_sols) return Vector{ProgramCache}() end + push!(partial_sols, all_selected_psols...) largest_subset_length = 0 cost = typemax(Int) best_sol = partial_sols[begin] From 6a0f80ad3d31a9c3feb7e8bfc4aa81aee216f3c5 Mon Sep 17 00:00:00 2001 From: Nicolae Filat Date: Mon, 13 May 2024 13:07:06 +0200 Subject: [PATCH 07/11] Remove probe double export --- src/HerbSearch.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/HerbSearch.jl b/src/HerbSearch.jl index 1f93c0d..8db3759 100644 --- a/src/HerbSearch.jl +++ b/src/HerbSearch.jl @@ -57,7 +57,7 @@ export optimal_program, suboptimal_program, - FixedShapedIterator, #TODO: deprecated after the cp thesis + FixedShapedIterator, UniformIterator, next_solution!, @@ -83,6 +83,5 @@ export validate_iterator, sample, rand, - probe, guided_search end # module HerbSearch From 9d7aac6f0ce4d22dbed1d8b7e8d645cfc37d6f48 Mon Sep 17 00:00:00 2001 From: Nicolae Filat Date: Mon, 13 May 2024 13:09:55 +0200 Subject: [PATCH 08/11] Remove commented lines and unused `new_all_selected` array --- src/probe/probe_iterator.jl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 3f3a49c..0ca4df6 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -69,10 +69,6 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_tim if next === nothing return nothing end - # select promising partial solutions that did not appear before - # if (isempty(all_selected_psols)) - # push!(all_selected_psols, psol_with_eval_cache...) - # end partial_sols = filter(x -> x ∉ all_selected_psols, select(psol_with_eval_cache, all_selected_psols)) if !isempty(partial_sols) print(rulenode2expr(partial_sols[1].program, iterator.grammar)) @@ -83,16 +79,12 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_tim eval_cache = Set() state = nothing - #for loop to update all_selected_psols - new_all_selected = Set{ProgramCache}() + #for loop to update all_selected_psols with new costs for prog_with_cache ∈ all_selected_psols program = prog_with_cache.program new_cost = calculate_program_cost(program, iterator.grammar) prog_with_cache.cost = new_cost - # program_cache = ProgramCache(program, prog_with_cache.correct_examples, cost) - # push!(new_all_selected, program_cache) end - # all_selected_psols = new_all_selected end end From ba3e392c1a842470b6df63fed3ae7259c46fe30f Mon Sep 17 00:00:00 2001 From: Nicolae Filat Date: Mon, 13 May 2024 13:28:15 +0200 Subject: [PATCH 09/11] Add `all_selected_psols` paramaters to the other select functions. Run reformat on the code. --- src/probe/probe_iterator.jl | 134 ++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index 0ca4df6..5e0241c 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -5,17 +5,17 @@ Stores the evaluation cost and the program in a structure. This """ mutable struct ProgramCache - program::RuleNode + program::RuleNode correct_examples::Vector{Int} cost::Int end function Base.:(==)(a::ProgramCache, b::ProgramCache) - return a.program == b.program + return a.program == b.program end Base.hash(a::ProgramCache) = hash(a.program) -select(partial_sols::Vector{ProgramCache}, all_selected_psols::Set{ProgramCache}) = HerbSearch.selectpsol_largest_subset(partial_sols, all_selected_psols) -update!(grammar::ContextSensitiveGrammar, PSols_with_eval_cache::Vector{ProgramCache}, examples::Vector{<:IOExample}) = update_grammar(grammar,PSols_with_eval_cache, examples) +select(partial_sols::Vector{ProgramCache}, all_selected_psols::Set{ProgramCache}) = HerbSearch.selectpsol_largest_subset(partial_sols, all_selected_psols) +update!(grammar::ContextSensitiveGrammar, PSols_with_eval_cache::Vector{ProgramCache}, examples::Vector{<:IOExample}) = update_grammar(grammar, PSols_with_eval_cache, examples) function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_time::Int, iteration_size::Int) start_time = time() @@ -24,7 +24,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_tim state = nothing symboltable = SymbolTable(iterator.grammar) # all partial solutions that were found so far - all_selected_psols = Set{ProgramCache}() + all_selected_psols = Set{ProgramCache}() # start next iteration while there is time left while time() - start_time < max_time i = 1 @@ -41,7 +41,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_tim for (example_index, example) ∈ enumerate(examples) output = execute_on_input(symboltable, expr, example.in) push!(eval_observation, output) - + if output == example.out push!(correct_examples, example_index) end @@ -76,7 +76,7 @@ function probe(examples::Vector{<:IOExample}, iterator::ProgramIterator, max_tim # update probabilites if any promising partial solutions update!(iterator.grammar, partial_sols, examples) # update probabilites # restart iterator - eval_cache = Set() + eval_cache = Set() state = nothing #for loop to update all_selected_psols with new costs @@ -96,47 +96,47 @@ function update_grammar(grammar::ContextSensitiveGrammar, PSols_with_eval_cache: for rule_index in eachindex(grammar.rules) # iterate for each rule_index highest_correct_nr = 0 for psol in PSols_with_eval_cache - program = psol.program + program = psol.program len_correct_examples = length(psol.correct_examples) - # Asume this works # check if the program tree has rule_index somewhere inside it using a recursive function - if contains_rule(program, rule_index) && len_correct_examples > highest_correct_nr - highest_correct_nr = len_correct_examples + if contains_rule(program, rule_index) && len_correct_examples > highest_correct_nr + highest_correct_nr = len_correct_examples end - end + end fitnes = highest_correct_nr / length(examples) - # println("Highest correct examples: $(highest_correct_nr)") - # println("Fitness $(fitnes)") p_uniform = 1 / length(grammar.rules) - + # compute (log2(p_u) ^ (1 - fit)) = (1-fit) * log2(p_u) - sum+=p_uniform^(1-fitnes) - log_prob = ((1 - fitnes) * log(2, p_uniform)) #/Z figure out the Z + sum += p_uniform^(1 - fitnes) + log_prob = ((1 - fitnes) * log(2, p_uniform)) grammar.log_probabilities[rule_index] = log_prob end + total_sum = 0 for rule_index in eachindex(grammar.rules) grammar.log_probabilities[rule_index] = grammar.log_probabilities[rule_index] - log(2, sum) + total_sum += 2^(grammar.log_probabilities[rule_index]) end - println(map(x -> rulenode2expr(x.program, grammar), PSols_with_eval_cache)) - for i in 1:6 - print("$(grammar.log_probabilities[i]) ") - end - println() - for i in 1:6 - print("$(2 ^ (grammar.log_probabilities[i])) ") - end - println() + @assert abs(total_sum - 1) <= 1e-4 "Total sum is $(total_sum) " end -# I will asume this works +""" + contains_rule(program::RuleNode, rule_index::Int) + +Check if a given `RuleNode` contains has used a derivation rule with the specified `rule_index` + +# Arguments +- `program::RuleNode`: The `RuleNode` to check. +- `rule_index::Int`: The index of the rule to check for. + +""" function contains_rule(program::RuleNode, rule_index::Int) if program.ind == rule_index # if the rule is good return true - return true - else - for child in program.children - if contains_rule(child, rule_index) # if a child has that rule then return true - return true - end + return true + else + for child in program.children + if contains_rule(child, rule_index) # if a child has that rule then return true + return true + end end return false # if no child has that rule return false end @@ -145,14 +145,14 @@ end """ - selectpsol_largest_subset(partial_sols::Vector{ProgramCache}) + selectpsol_largest_subset(partial_sols::Vector{ProgramCache}}, all_selected_psols::Set{ProgramCache})) This scheme selects a single cheapest program (first enumerated) that satisfies the largest subset of examples encountered so far across all partial_sols. """ -function selectpsol_largest_subset( partial_sols::Vector{ProgramCache}, all_selected_psols::Set{ProgramCache}) +function selectpsol_largest_subset(partial_sols::Vector{ProgramCache}, all_selected_psols::Set{ProgramCache}) if isempty(partial_sols) - return Vector{ProgramCache}() + return Vector{ProgramCache}() end push!(partial_sols, all_selected_psols...) largest_subset_length = 0 @@ -170,14 +170,14 @@ function selectpsol_largest_subset( partial_sols::Vector{ProgramCache}, all_sele end """ - selectpsol_first_cheapest(partial_sols::Vector{ProgramCache}) + selectpsol_first_cheapest(partial_sols::Vector{ProgramCache}}, all_selected_psols::Set{ProgramCache})) This scheme selects a single cheapest program (first enumerated) that satisfies a unique subset of examples. """ -function selectpsol_first_cheapest(partial_sols::Vector{ProgramCache}) +function selectpsol_first_cheapest(partial_sols::Vector{ProgramCache}, all_selected_psol::Set{ProgramCache}) # maps subset of examples to the cheapest program - mapping = Dict{Vector{Int}, ProgramCache}() + mapping = Dict{Vector{Int},ProgramCache}() for sol ∈ partial_sols examples = sol.correct_examples if !haskey(mapping, examples) @@ -194,13 +194,13 @@ function selectpsol_first_cheapest(partial_sols::Vector{ProgramCache}) end """ - selectpsol_all_cheapest(partial_sols::Vector{ProgramCache}) + selectpsol_all_cheapest(partial_sols::Vector{ProgramCache}, all_selected_psol::Set{ProgramCache}) This scheme selects all cheapest programs that satisfies a unique subset of examples. """ -function selectpsol_all_cheapest(partial_sols::Vector{ProgramCache}) +function selectpsol_all_cheapest(partial_sols::Vector{ProgramCache}, all_selected_psol::Set{ProgramCache}) # maps subset of examples to the cheapest program - mapping = Dict{Vector{Int}, Vector{ProgramCache}}() + mapping = Dict{Vector{Int},Vector{ProgramCache}}() for sol ∈ partial_sols examples = sol.correct_examples if !haskey(mapping, examples) @@ -224,7 +224,7 @@ end spec::Vector{<:IOExample}, symboltable::SymbolTable ) -@kwdef mutable struct GuidedSearchState +@kwdef mutable struct GuidedSearchState level::Int64 bank::Vector{Vector{RuleNode}} eval_cache::Set @@ -232,10 +232,10 @@ end end function Base.iterate(iter::GuidedSearchIterator) iterate(iter, GuidedSearchState( - level = -1, - bank = [], - eval_cache = Set(), - programs = [] + level=-1, + bank=[], + eval_cache=Set(), + programs=[] )) end @@ -261,16 +261,16 @@ function Base.iterate(iter::GuidedSearchIterator, state::GuidedSearchState) output = execute_on_input(iter.symboltable, expr, example.in) push!(eval_observation, output) end - + if eval_observation in state.eval_cache # program already cached continue end - push!(state.bank[state.level + 1], prog) # add program to bank + push!(state.bank[state.level+1], prog) # add program to bank push!(state.eval_cache, eval_observation) # add result to cache - return(prog, state) # return program + return (prog, state) # return program end - + # current level has been exhausted, go to next level return iterate(iter, state) end @@ -278,19 +278,19 @@ end @programiterator ProbeSearchIterator( spec::Vector{<:IOExample}, cost_function::Function, - level_limit = 8 -) + level_limit=8 +) -@kwdef mutable struct ProbeSearchState +@kwdef mutable struct ProbeSearchState level::Int64 bank::Vector{Vector{RuleNode}} eval_cache::Set - partial_sols::Vector{RuleNode} + partial_sols::Vector{RuleNode} end function calculate_rule_cost_prob(rule_index, grammar) log_prob = grammar.log_probabilities[rule_index] - return convert(Int64,round(-log_prob)) + return convert(Int64, round(-log_prob)) end function calculate_rule_cost_size(rule_index, grammar) @@ -303,7 +303,7 @@ calculate_rule_cost(rule_index::Int, grammar::ContextSensitiveGrammar) = calcula calculate_program_cost(program::RuleNode, grammar::ContextSensitiveGrammar) Calculates the cost of a program by summing up the cost of the children and the cost of the rule """ -function calculate_program_cost(program::RuleNode, grammar::ContextSensitiveGrammar) +function calculate_program_cost(program::RuleNode, grammar::ContextSensitiveGrammar) cost_children = sum([calculate_program_cost(child, grammar) for child ∈ program.children], init=0) cost_rule = calculate_rule_cost(program.ind, grammar) return cost_children + cost_rule @@ -330,14 +330,14 @@ function newprograms(grammar, level, bank) # create a list of nr_children iterators iterators = [] for i ∈ 1:nr_children - push!(iterators, 1:(level - rule_cost)) + push!(iterators, 1:(level-rule_cost)) end options = Iterators.product(iterators...) # TODO : optimize options generation for costs ∈ options if sum(costs) == level - rule_cost # julia indexes from 1 that is why I add 1 here - bank_indexed = [bank[cost + 1] for cost ∈ costs] + bank_indexed = [bank[cost+1] for cost ∈ costs] cartesian_product = Iterators.product(bank_indexed...) for program_options ∈ cartesian_product # TODO: check if the right types are good @@ -355,11 +355,11 @@ end function Base.iterate(iter::ProbeSearchIterator) iterate(iter, ProbeSearchState( - level = 0, - bank = Vector(), - eval_cache = Set(), - partial_sols = Vector() - ) + level=0, + bank=Vector(), + eval_cache=Set(), + partial_sols=Vector() + ) ) end @@ -370,7 +370,7 @@ function Base.iterate(iter::ProbeSearchIterator, state::ProbeSearchState) symboltable = SymbolTable(iter.grammar) while state.level <= start_level + iter.level_limit # add another level to the bank that is empty - push!(state.bank,[]) + push!(state.bank, []) new_programs = newprograms(iter.grammar, state.level, state.bank) if time() - start_time >= 10 @warn "Probe took more than 10 seconds to run..." @@ -397,8 +397,8 @@ function Base.iterate(iter::ProbeSearchIterator, state::ProbeSearchState) elseif nr_correct_examples >= 1 push!(state.partial_sols, program) end - push!(state.bank[state.level + 1], program) - push!(state.eval_cache, eval_observation) + push!(state.bank[state.level+1], program) + push!(state.eval_cache, eval_observation) end state.level = state.level + 1 end From 5f6b01359c3743fa13b800a9e87bc7ff4f80168e Mon Sep 17 00:00:00 2001 From: Nicolae Filat Date: Mon, 13 May 2024 14:54:43 +0200 Subject: [PATCH 10/11] Fix for visualizing merge --- src/HerbSearch.jl | 2 +- src/probe/{probe.jl => probe_iterator.jl} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/probe/{probe.jl => probe_iterator.jl} (100%) diff --git a/src/HerbSearch.jl b/src/HerbSearch.jl index 11deb2b..ed364e5 100644 --- a/src/HerbSearch.jl +++ b/src/HerbSearch.jl @@ -37,7 +37,7 @@ include("genetic_functions/select_parents.jl") include("genetic_search_iterator.jl") include("random_iterator.jl") -include("probe/probe.jl") +include("probe/probe_iterator.jl") export ProgramIterator, diff --git a/src/probe/probe.jl b/src/probe/probe_iterator.jl similarity index 100% rename from src/probe/probe.jl rename to src/probe/probe_iterator.jl From aeead909916b371ee4fd258ff92b3f1ccda627f6 Mon Sep 17 00:00:00 2001 From: Nicolae Filat Date: Mon, 13 May 2024 15:01:48 +0200 Subject: [PATCH 11/11] Remove duplicate lins --- src/probe/probe_iterator.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/probe/probe_iterator.jl b/src/probe/probe_iterator.jl index e1354ab..d27674f 100644 --- a/src/probe/probe_iterator.jl +++ b/src/probe/probe_iterator.jl @@ -156,7 +156,6 @@ satisfies the largest subset of examples encountered so far across all partial_s function selectpsol_largest_subset(partial_sols::Vector{ProgramCache}, all_selected_psols::Set{ProgramCache}) if isempty(partial_sols) return Vector{ProgramCache}() - return Vector{ProgramCache}() end push!(partial_sols, all_selected_psols...) largest_subset_length = 0 @@ -182,7 +181,6 @@ satisfies a unique subset of examples. function selectpsol_first_cheapest(partial_sols::Vector{ProgramCache}, all_selected_psol::Set{ProgramCache}) # maps subset of examples to the cheapest program mapping = Dict{Vector{Int},ProgramCache}() - mapping = Dict{Vector{Int},ProgramCache}() for sol ∈ partial_sols examples = sol.correct_examples if !haskey(mapping, examples) @@ -206,7 +204,6 @@ This scheme selects all cheapest programs that satisfies a unique subset of exam function selectpsol_all_cheapest(partial_sols::Vector{ProgramCache}, all_selected_psol::Set{ProgramCache}) # maps subset of examples to the cheapest program mapping = Dict{Vector{Int},Vector{ProgramCache}}() - mapping = Dict{Vector{Int},Vector{ProgramCache}}() for sol ∈ partial_sols examples = sol.correct_examples if !haskey(mapping, examples) @@ -229,7 +226,6 @@ end function calculate_rule_cost_prob(rule_index, grammar) log_prob = grammar.log_probabilities[rule_index] return convert(Int64, round(-log_prob)) - return convert(Int64, round(-log_prob)) end function calculate_rule_cost_size(rule_index, grammar)