From c00799dd5e084f3c9894e23ce990de8ecf5de0f2 Mon Sep 17 00:00:00 2001
From: Pierre Villemot <pierre.villemot@ocamlpro.com>
Date: Wed, 14 Aug 2024 14:26:54 +0200
Subject: [PATCH] Fix issue 929

This PR fixes the issue #929 about model arrays.
Our implementation of `ArrayEx` is complete but we do not split on
literals `a = b` where `a` and `b` are any arrays of the problem. Doing
these splits during the search proof would be a bad idea because we do
not need them to be complete. In the example of the issue #929, we
obtain an equality of the form `(store a1 x y) = (store a2 x y)` and
we know that `(select a1 x) = (select a2 x)`. We never try to decide `a1
= a2`.

Notice that I update the set of arrays by tracking literals `Eq` of
origin `Subst`. We know that this design is fragile because it could
happen that substitutions are not sent to theories. The worst that can
happen is to produced wrong models for arrays in very tricky cases.
A better implementation consists in using our new global domains system to
track `selects` in `Adt_rel` and use the keys of this map instead of
`arrays`. I did not opt to this implementation to keep this PR simple
and atomic.
---
 src/lib/reasoners/arrays_rel.ml           |  69 +++++++++++--
 src/lib/reasoners/uf.ml                   | 114 +++++++++++++---------
 tests/issues/555.models.expected          |   2 +-
 tests/models/array/array1.models.expected |   2 +-
 4 files changed, 128 insertions(+), 59 deletions(-)

diff --git a/src/lib/reasoners/arrays_rel.ml b/src/lib/reasoners/arrays_rel.ml
index 2551f2947..b27839cf8 100644
--- a/src/lib/reasoners/arrays_rel.ml
+++ b/src/lib/reasoners/arrays_rel.ml
@@ -31,6 +31,7 @@ module A  = Xliteral
 module L  = List
 
 module X = Shostak.Combine
+module SX = Shostak.SXH
 module Ex = Explanation
 
 module LR = Uf.LX
@@ -91,6 +92,8 @@ type t =
    new_terms : E.Set.t;
    size_splits : Numbers.Q.t;
    cached_relevant_terms : (G.t * S.t TBS.t) H.t;
+   arrays : SX.t;
+   (* Set of all class representatives of arrays. *)
   }
 
 
@@ -103,6 +106,7 @@ let empty uf =
    new_terms = E.Set.empty;
    size_splits = Numbers.Q.one;
    cached_relevant_terms = H.create 1024;
+   arrays = SX.empty
   }, Uf.domains uf
 
 (*BISECT-IGNORE-BEGIN*)
@@ -415,17 +419,40 @@ let new_equalities env eqs la class_of =
   implied_consequences env eqs la
 
 (* choisir une egalite sur laquelle on fait un case-split *)
-let two = Numbers.Q.from_int 2
-
-let case_split env _uf ~for_model:_ =
+let two = Numbers.Q.from_int 3
+
+let split_arrays ~for_model env =
+  if for_model then
+    match SX.choose env.arrays with
+    | exception Not_found -> None
+    | a1 ->
+      match SX.find_first (fun a -> not @@ X.equal a a1) env.arrays with
+      | exception Not_found -> None
+      | a2 ->
+        Some (LR.neg @@ LR.mk_eq a1 a2)
+  else
+    None
+
+let (let*) = Option.bind
+
+let split_indices env =
+  let* cs = LR.Set.choose_opt env.split in
+  Some (LR.neg cs)
+
+let next_split ~for_model env =
+  match split_indices env with
+  | None -> split_arrays ~for_model env
+  | cs -> cs
+
+let case_split env _uf ~for_model =
   (*if Numbers.Q.compare
     (Numbers.Q.mult two env.size_splits) (max_split ()) <= 0  ||
     Numbers.Q.sign  (max_split ()) < 0 then*)
-  try
-    let a = LR.neg (LRset.choose env.split) in
-    Debug.case_split a;
-    [LR.view a, true, Th_util.CS (Th_util.Th_arrays, two)]
-  with Not_found ->
+  match next_split ~for_model env with
+  | Some cs ->
+    Debug.case_split cs;
+    [LR.view cs, true, Th_util.CS (Th_util.Th_arrays, two)]
+  | None ->
     Debug.case_split_none ();
     []
 
@@ -442,12 +469,28 @@ let count_splits env la =
   in
   {env with size_splits = nb}
 
+let is_array r =
+  match X.type_info r with
+  | Ty.Tfarray _ -> true
+  | _ -> false
+
 let assume env uf la =
   let are_eq = Uf.are_equal uf ~added_terms:true in
   let are_neq = Uf.are_distinct uf in
   let class_of = Uf.class_of uf in
   let env = count_splits env la in
 
+  (* Update the set of representatives of arrays. *)
+  let env =
+    List.fold_left
+      (fun env l ->
+         match l with
+         | Xliteral.Eq (r1, r2), _, _, Th_util.Subst when is_array r2 ->
+           { env with arrays = SX.remove r1 env.arrays |> SX.add r2 }
+         | _ -> env
+      ) env la
+  in
+
   (* instantiation des axiomes des tableaux *)
   Debug.assume la;
   let env = new_terms env la in
@@ -462,7 +505,15 @@ let assume env uf la =
   env, Uf.domains uf, { Sig_rel.assume = l; remove = [] }
 
 let query _ _ _ = None
-let add env uf _ _ = env, Uf.domains uf, []
+
+let add env uf r _ =
+  match X.type_info r with
+  | Ty.Tfarray _ ->
+    let rr, _ = Uf.find_r uf r in
+    let arrays = SX.add rr env.arrays in
+    { env with arrays }, Uf.domains uf, []
+  | _ ->
+    env, Uf.domains uf, []
 
 let new_terms env = env.new_terms
 let instantiate ~do_syntactic_matching:_ _ env _ _ = env, []
diff --git a/src/lib/reasoners/uf.ml b/src/lib/reasoners/uf.ml
index 46f330070..4e9e83b6f 100644
--- a/src/lib/reasoners/uf.ml
+++ b/src/lib/reasoners/uf.ml
@@ -1134,35 +1134,57 @@ let terms env =
 
 (* Helper functions used by the caches during the computation of the model. *)
 module Cache = struct
-  let store_array_get arrays_cache (t : Expr.t) (i : Expr.t) (v : Expr.t) =
-    match Hashtbl.find_opt arrays_cache t with
-    | Some values ->
-      Hashtbl.replace values i v
-    | None ->
+  type t = {
+    arrays : (Expr.t, unit) Hashtbl.t;
+    (** Set of all the array for which we need to generate a model value.
+        Notice that this set also contains embedded arrays. *)
+
+    selects: (X.r, (Expr.t, Expr.t) Hashtbl.t) Hashtbl.t;
+    (** Contains all the accesses to arrays. *)
+
+    abstracts: (r, Expr.t) Hashtbl.t;
+    (** Contains all the abstract values generated. This cache is necessary
+        to ensure we don't generate twice an abstract value for a given symbol. *)
+  }
+
+  let mk () = {
+    arrays = Hashtbl.create 17;
+    selects = Hashtbl.create 17;
+    abstracts = Hashtbl.create 17;
+  }
+
+  let add_array cache env (t : Expr.t) =
+    Hashtbl.add cache.arrays t ();
+    let r, _ = find env t in
+    match Hashtbl.find cache.selects r with
+    | exception Not_found ->
+      Hashtbl.add cache.selects r (Hashtbl.create 17)
+    | _ -> ()
+
+  let update_select cache env (t : Expr.t) (i : Expr.t) (v : Expr.t) =
+    let r, _ = find env t in
+    match Hashtbl.find cache.selects r with
+    | exception Not_found ->
       let values = Hashtbl.create 17 in
       Hashtbl.add values i v;
-      Hashtbl.add arrays_cache t values
+      Hashtbl.add cache.selects r values
+    | values ->
+      Hashtbl.replace values i v
 
-  let get_abstract_for abstracts_cache env (t : Expr.t) =
+  let retrieve_selects cache env (t : Expr.t) =
     let r, _ = find env t in
-    match Hashtbl.find_opt abstracts_cache r with
-    | Some abstract -> abstract
-    | None ->
+    Hashtbl.find cache.selects r
+
+  let get_abstract_for cache env (t : Expr.t) =
+    let r, _ = find env t in
+    match Hashtbl.find cache.abstracts r with
+    | exception Not_found ->
       let abstract = Expr.mk_abstract (Expr.type_info t) in
-      Hashtbl.add abstracts_cache r abstract;
+      Hashtbl.add cache.abstracts r abstract;
       abstract
+    | abstract -> abstract
 end
 
-type cache = {
-  array_selects: (Expr.t, (Expr.t, Expr.t) Hashtbl.t)
-      Hashtbl.t;
-  (** Stores all the get accesses to arrays. *)
-
-  abstracts: (r, Expr.t) Hashtbl.t;
-  (** Stores all the abstract values generated. This cache is necessary
-      to ensure we don't generate twice an abstract value for a given symbol. *)
-}
-
 let is_destructor = function
   | Sy.Op (Destruct _) -> true
   | _ -> false
@@ -1179,8 +1201,9 @@ let is_destructor = function
      Alt-Ergo cannot produces a constant value for them. This function creates
      a new abstract value in this case. *)
 let compute_concrete_model_of_val cache =
-  let store_array_select = Cache.store_array_get cache.array_selects
-  and get_abstract_for = Cache.get_abstract_for cache.abstracts
+  let add_array = Cache.add_array cache
+  and get_abstract_for = Cache.get_abstract_for cache
+  and update_select = Cache.update_select cache
   in fun env t ((mdl, mrepr) as acc) ->
     let { E.f; xs; ty; _ } = E.term_view t in
     (* TODO: We have to filter out destructors here as we don't consider
@@ -1208,23 +1231,18 @@ let compute_concrete_model_of_val cache =
         let ret_rep, mrepr = model_repr_of_term t env mrepr in
         match f, arg_vals, ty with
         | Sy.Name _, [], Ty.Tfarray _ ->
-          begin
-            match Hashtbl.find_opt cache.array_selects t with
-            | Some _ -> acc
-            | None ->
-              (* We have to add an abstract array in case there is no
-                 constraint on its values. *)
-              Hashtbl.add cache.array_selects t (Hashtbl.create 17);
-              acc
-          end
+          add_array env t;
+          acc
 
         | Sy.Op Sy.Set, _, _ -> acc
 
         | Sy.Op Sy.Get, [a; i], _ ->
-          begin
-            store_array_select a i ret_rep;
-            acc
-          end
+          (* We need to save the array [a] as it can be an embedded array
+             that is not declared by the user but should appear in some
+             values of the model. *)
+          add_array env a;
+          update_select env a i ret_rep;
+          acc
 
         | Sy.Name { hs = id; _ }, _, _ ->
           let value =
@@ -1246,8 +1264,9 @@ let compute_concrete_model_of_val cache =
       end
 
 let extract_concrete_model cache =
-  let compute_concrete_model_of_val = compute_concrete_model_of_val cache in
-  let get_abstract_for = Cache.get_abstract_for cache.abstracts
+  let compute_concrete_model_of_val = compute_concrete_model_of_val cache
+  and get_abstract_for = Cache.get_abstract_for cache
+  and retrieve_selects = Cache.retrieve_selects cache
   in fun ~prop_model ~declared_ids env ->
     let terms, suspicious = terms env in
     let model, mrepr =
@@ -1255,10 +1274,11 @@ let extract_concrete_model cache =
         terms (ModelMap.empty ~suspicious declared_ids, ME.empty)
     in
     let model =
-      Hashtbl.fold (fun t vals mdl ->
+      Hashtbl.fold (fun t () mdl ->
           (* We produce a fresh identifiant for abstract value in order to
              prevent any capture. *)
           let abstract = get_abstract_for env t in
+          let vals = retrieve_selects env t in
           let ty = Expr.type_info t in
           let arr_val =
             Hashtbl.fold (fun i v arr_val ->
@@ -1272,7 +1292,7 @@ let extract_concrete_model cache =
             | Sy.Name { hs; _ } -> hs, false
             | _ ->
               (* We only store array declarations as keys in the cache
-                 [array_selects]. *)
+                 [cache.selects]. *)
               assert false
           in
           let mdl =
@@ -1280,23 +1300,21 @@ let extract_concrete_model cache =
               ModelMap.add (id, [], ty) [] arr_val mdl
             else
               (* Internal identifiers can occur here if we need to generate
-                 a model term for an embedded array but this array isn't itself
-                 declared by the user -- see the [embedded-array] test . *)
+                 a model term for an embedded array but this array is not
+                 itself declared by the user -- see the [embedded-array]
+                 test. *)
               mdl
           in
           (* We need to update the model [mdl] in order to substitute all the
              occurrences of the array identifier [id] by an appropriate model
              term. This cannot be performed while computing the model with
-             `compute_concrete_model_of_val` because we need to first iterate
+             [compute_concrete_model_of_val] because we need to first iterate
              on all the union-find environment to collect array values. *)
           ModelMap.subst id arr_val mdl
-        ) cache.array_selects model
+        ) cache.arrays model
     in
     { Models.propositional = prop_model; model; term_values = mrepr }
 
 let extract_concrete_model ~prop_model ~declared_ids =
-  let cache : cache = {
-    array_selects = Hashtbl.create 17;
-    abstracts = Hashtbl.create 17;
-  }
+  let cache = Cache.mk ()
   in fun env -> extract_concrete_model cache ~prop_model ~declared_ids env
diff --git a/tests/issues/555.models.expected b/tests/issues/555.models.expected
index f5b97ee0b..8a798f249 100644
--- a/tests/issues/555.models.expected
+++ b/tests/issues/555.models.expected
@@ -4,5 +4,5 @@ unknown
   (define-fun x () Int 0)
   (define-fun y () Int 0)
   (define-fun a1 () (Array Int Int) (store (as @a4 (Array Int Int)) 0 0))
-  (define-fun a2 () (Array Int Int) (store (as @a5 (Array Int Int)) 0 0))
+  (define-fun a2 () (Array Int Int) (store (as @a4 (Array Int Int)) 0 0))
 )
diff --git a/tests/models/array/array1.models.expected b/tests/models/array/array1.models.expected
index f5b97ee0b..8a798f249 100644
--- a/tests/models/array/array1.models.expected
+++ b/tests/models/array/array1.models.expected
@@ -4,5 +4,5 @@ unknown
   (define-fun x () Int 0)
   (define-fun y () Int 0)
   (define-fun a1 () (Array Int Int) (store (as @a4 (Array Int Int)) 0 0))
-  (define-fun a2 () (Array Int Int) (store (as @a5 (Array Int Int)) 0 0))
+  (define-fun a2 () (Array Int Int) (store (as @a4 (Array Int Int)) 0 0))
 )