-
Notifications
You must be signed in to change notification settings - Fork 10
/
lis.ml
2137 lines (2035 loc) · 81.7 KB
/
lis.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(*
Copyright 2013 Sébastien Ferré, IRISA, Université de Rennes 1, [email protected]
This file is part of Sparklis.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*)
open Js_of_ocaml
open Js
(* indices *)
type 'a forest = 'a tree list
and 'a tree = Node of 'a * 'a forest
let js_forest_map (m : 'a Jsutils.js_map) : 'a forest Jsutils.js_map =
Jsutils.js_map
(`List (`Record [| "item", Jsutils.js_custom_spec m; (* singleton sum is like a record *)
"children", `Rec "self" |]))
let rec forest_filter_map (f : 'a -> 'b option) : 'a forest -> 'b forest =
function
| [] -> []
| tree::forest1 -> tree_filter_map f tree @ forest_filter_map f forest1
and tree_filter_map f : 'a tree -> 'b forest =
function
| Node (item, children) ->
match f item with
| None -> forest_filter_map f children
| Some item' -> [Node (item', forest_filter_map f children)]
class ['a,'b] index ?(parents : ('a -> 'a list) option) () =
object (self)
val mutable organized : bool = false
val mutable h : ('a, 'b * 'a list ref (* children *) * 'a list ref (* parents *)) Hashtbl.t = Hashtbl.create 101
val mutable roots : 'a list = []
val mutable leaves : 'a list = []
method mem (elt : 'a) : bool =
Hashtbl.mem h elt
method get (elt : 'a) : 'b option =
try
let v, _, _ = Hashtbl.find h elt in
Some v
with Not_found -> None
method add (elt, info : 'a * 'b) : unit =
assert (not organized);
Hashtbl.add h elt (info, ref [], ref [])
method remove (elt : 'a) : unit =
assert (not organized);
Hashtbl.remove h elt
method private organize : unit = (* WARNING: must be called after all additions *)
match organized, parents with
| _, None -> ()
| true, _ -> ()
| false, Some parents ->
let add_child k_parent k_child : bool = (* returns true if parent exists *)
try
let _v, ref_children, _ref_parents = Hashtbl.find h k_parent in
ref_children := k_child::!ref_children;
true
with Not_found -> false (* absent parents are ignored *)
in
Hashtbl.iter
(fun k_child (_,_,ref_parents) ->
let l_parents = parents k_child in
let present_parents =
List.filter
(fun k_parent ->
let present = add_child k_parent k_child in
present)
l_parents in
ref_parents := present_parents)
h;
Hashtbl.iter
(fun k (_, ref_children, ref_parents) ->
if !ref_children = [] then leaves <- k::leaves;
if !ref_parents = [] then roots <- k::roots)
h;
organized <- true
method is_empty : bool = (Hashtbl.length h = 0)
method length : int = Hashtbl.length h
method fold : 'c. ('c -> 'a * 'b -> 'c) -> 'c -> 'c =
fun f init -> Hashtbl.fold (fun k (v,_,_) res -> f res (k,v)) h init
method iter : ('a * 'b -> unit) -> unit =
fun f -> Hashtbl.iter (fun k (v,_,_) -> f (k,v)) h
method filter_map_list : 'c. ('a * 'b -> 'c option) -> 'c list =
fun f ->
Hashtbl.fold
(fun k (v,_,_) res ->
match f (k,v) with
| Some x -> x::res
| None -> res)
h []
method filter_map_forest : 'c. inverse:bool -> ('a * 'b -> 'c option) -> 'c forest =
fun ~inverse f ->
self#organize;
if organized then
let rec aux ancestors (keys : 'a list) =
Common.mapfilter
(fun k ->
if List.mem k ancestors (* there is a loop through k *)
then None
else
let k, (v, ref_children, ref_parents) =
try k, Hashtbl.find h k
with Not_found -> assert false in
match f (k,v) with
| Some x ->
let node_children =
if inverse
then aux (k::ancestors) !ref_parents
else aux (k::ancestors) !ref_children in
Some (Node (x, node_children))
| None -> None)
keys
in
aux [] (if inverse then leaves else roots)
else (* no tree organization *)
Hashtbl.fold
(fun k (v,_,_) res ->
match f (k,v) with
| Some x -> Node (x, []) :: res
| None -> res)
h []
method sample_list : 'c. int -> ('a * 'b -> 'c) -> int * 'c list =
fun max f ->
let _, n, res =
Hashtbl.fold
(fun k (v,_,_) (max,n,res as acc) ->
if max <= 0 then acc else (max-1, n+1, f (k,v) :: res))
h (max,0,[]) in
n, res
end
let empty_index () = new index ()
let singleton_index elt_info =
let index = new index () in
index#add elt_info;
index
class ['a] int_index = ['a,int] index ()
class ['a,'b] nested_int_index = ['a, int * 'b int_index] index ()
type freq_unit = Results | Entities | Concepts | Modifiers
let js_freq_unit_map : freq_unit Jsutils.js_map =
Jsutils.js_map
(`Enum [| "Results"; "Entities"; "Concepts"; "Modifiers"|])
type freq = { value : int; max_value : int option; partial : bool; unit : freq_unit }
let js_freq_map : freq Jsutils.js_map =
let open Jsutils in
js_map
(`Record [| "value", `Int;
"maximum", `Option `Int;
"partialCount", `Bool;
"unit", js_custom_spec js_freq_unit_map |])
class incr_freq_index = [Lisql.increment, freq option] index ()
type incr_freq_forest = (Lisql.increment * freq option) forest
let js_incr_freq_forest_map : incr_freq_forest Jsutils.js_map =
let open Jsutils in
js_forest_map
(js_map (`Record [| "suggestion", js_custom_spec Lisql.js_increment_map; (* Tuple and Record have same internal repr *)
"frequency", `Option (js_custom_spec js_freq_map) |]))
type suggestions =
{ partial : bool;
forest : incr_freq_forest }
let js_suggestions_map : suggestions Jsutils.js_map =
let open Jsutils in
js_map
(`Record [| "partial", `Bool;
"forest", js_custom_spec js_incr_freq_forest_map |])
(* increment hierarchies*)
let term_hierarchy_of_focus focus =
match Lisql.hierarchy_of_focus focus with
| None -> Ontology.no_relation
| Some (id,pred,args,argo) ->
Ontology.sparql_relations#get_hierarchy
~mode:`Tabled
~froms:Sparql_endpoint.config_default_graphs#froms
~property_path:(Lisql2sparql.path_pred_args_argo pred args argo)
let increment_parents (term_hierarchy : Rdf.uri Ontology.relation) = function
| Lisql.IncrTerm (Rdf.URI uri) -> List.map (fun u -> Lisql.IncrTerm (Rdf.URI u)) (term_hierarchy#info uri)
| Lisql.IncrType uri -> List.map (fun u -> Lisql.IncrType u) (Ontology.config_class_hierarchy#value#info uri)
| Lisql.IncrRel (uri,xwd) -> List.map (fun u -> Lisql.IncrRel (u,xwd)) (Ontology.config_property_hierarchy#value#info uri)
| _ -> []
class incr_freq_tree_index th = [Lisql.increment, freq option] index ~parents:(increment_parents th) ()
(* configuration *)
let config_intentional_init_concepts = new Config.boolean_input ~key:"intentional_init_concepts" ~input_selector:"#input-intentional-init-concepts" ~default:true ()
let config_nary_relations = new Config.boolean_input ~key:"nary_relations" ~input_selector:"#input-nary-relations" ~default:false ()
let config_incr_sim = new Config.boolean_input ~key:"incr_sim" ~input_selector:"#input-incr-sim" ~default:false ()
let config_concept_profile = new Config.string_input ~key:"concept_profile" ~input_selector:"#input-concept-profile" ~default:"" ()
let config_regexp_hidden_URIs = new Config.string_input ~key:"regexp_hidden_URIs" ~input_selector:"#input-regexp-hidden-uris" ~default:"" ()
let config_max_results = new Config.integer_input ~key:"max_results" ~input_selector:"#input-max-results" ~min:1 ~default:200 ()
let config_max_increment_samples = new Config.integer_input ~key:"max_increment_samples" ~input_selector:"#input-max-increment-samples" ~min:1 ~default:200 ()
let config_max_classes = new Config.integer_input ~key:"max_classes" ~input_selector:"#input-max-classes" ~min:0 ~default:200 ()
let config_max_properties = new Config.integer_input ~key:"max_properties" ~input_selector:"#input-max-properties" ~min:0 ~default:200 ()
let config_avoid_lengthy_queries = new Config.boolean_input ~key:"avoid_lengthy_queries" ~input_selector:"#input-avoid-lengthy-queries" ~default:false ()
let config_wikidata_hide_ID_properties = new Config.boolean_input ~key:"wikidata_hide_ID_properties" ~input_selector:"#input-wikidata-hide-ID-properties" ~default:true ()
let regexp_sep = Regexp.regexp "[,;][ ]*"
let formula_concept_profile_term (tx : _ Sparql.any_term) : Sparql.formula =
let profile = config_concept_profile#value in
if profile = ""
then Sparql.True
else
let uris = List.filter ((<>) "") (Regexp.split regexp_sep profile) in
if uris = []
then Sparql.True
else Sparql.(Pattern
(union
(List.map
(fun u -> rdf_type tx (term_uri u))
uris)))
let formula_concept_profile (v : string) : Sparql.formula =
formula_concept_profile_term (Sparql.var v)
let formula_hidden_URIs_term (tx : _ Sparql.any_term) : Sparql.formula =
match config_regexp_hidden_URIs#value with
| "" -> Sparql.True
| re -> Sparql.(Filter (log_not (log_and [expr_func "BOUND" [tx]; expr_regex (expr_func "str" [tx]) re])))
let formula_hidden_URIs (v : string) : Sparql.formula =
formula_hidden_URIs_term (Sparql.var v)
let pattern_hidden_URIs (v : string) : Sparql.pattern =
Sparql.pattern_of_formula (formula_hidden_URIs v)
(* private intermediate functions, used to produce index *)
let nested_hashtbl_of_results_varterm_list
(keys_vt : Rdf.term list) (nested_vt_opt : Rdf.term option)
results : (Rdf.term option list, int ref * (Rdf.term option, int ref) Hashtbl.t) Hashtbl.t =
let open Sparql_endpoint in
let get vt : binding -> Rdf.term option =
match vt with
| Rdf.Var v ->
if List.mem_assoc v results.vars
then
let i = List.assoc v results.vars in
(fun binding -> binding.(i))
else (fun binding -> None)
| t ->
(fun binding -> Some t) in
let get_keys =
let l_get_key = List.map get keys_vt in
(fun binding -> List.map ((|>) binding) l_get_key) in
let get_nested =
match nested_vt_opt with
| None -> (fun binding -> None)
| Some nested_vt ->
let get_nested = get nested_vt in
(fun binding -> get_nested binding)
in
let ht = Hashtbl.create 1000 in
List.iter
(fun binding ->
let keys = get_keys binding in
let cpt1, nested_ht =
try Hashtbl.find ht keys
with Not_found ->
let data = ref 0, Hashtbl.create 3 in
Hashtbl.add ht keys data;
data in
incr cpt1;
let nested = get_nested binding in
let cpt2 =
try Hashtbl.find nested_ht nested
with Not_found ->
let cpt2 = ref 0 in
Hashtbl.add nested_ht nested cpt2;
cpt2 in
incr cpt2)
results.bindings;
ht
let int_index_of_nested_hashtbl ~mapfilter ht =
let index = new int_index in
Hashtbl.iter
(fun k cpt ->
match mapfilter k with
| None -> ()
| Some k -> index#add (k, !cpt))
ht;
index
let nested_int_index_of_hashtbl ~mapfilter ~nested_mapfilter ht =
let index = new nested_int_index in
Hashtbl.iter
(fun k (cpt1,nested_ht) ->
match mapfilter k with
| None -> ()
| Some k -> index#add (k, (!cpt1, int_index_of_nested_hashtbl ~mapfilter:nested_mapfilter nested_ht)))
ht;
index
let int_index_of_hashtbl ~mapfilter ht =
let index = new int_index in
Hashtbl.iter
(fun k (cpt,_) ->
match mapfilter k with
| None -> ()
| Some k -> index#add (k, !cpt))
ht;
index
let index_of_results_varterm ?(filter = fun (_ : Rdf.term) -> true) vt results : Rdf.term int_index =
match vt with
| Rdf.Var _ ->
let ht = nested_hashtbl_of_results_varterm_list [vt] None results in
int_index_of_hashtbl
~mapfilter:(function [Some key] when filter key -> Some key | _ -> None)
ht
| t -> singleton_index (t,1)
(* distinct count of values for some results column *)
let count_of_results_varterm vt results : int =
match vt with
| Rdf.Var _ ->
let ht = nested_hashtbl_of_results_varterm_list [vt] None results in
Hashtbl.length ht
| t -> 1
let index_of_results_varterm_list ?(filter = fun (_ : Rdf.term) -> true) keys_vt results : Rdf.term option list int_index =
if List.exists Rdf.term_is_var keys_vt
then
let ht = nested_hashtbl_of_results_varterm_list keys_vt None results in
int_index_of_hashtbl
~mapfilter:(fun keys -> if List.for_all (function None -> true | Some key -> filter key) keys then Some keys else None)
ht
else singleton_index (List.map (fun t -> Some t) keys_vt, 1)
let nested_index_of_results_varterm ?(filter = fun _ -> true) key_vt nested_vt_opt results : (Rdf.term, Rdf.term) nested_int_index =
match key_vt, nested_vt_opt with
| Rdf.Var _, _
| _, Some (Rdf.Var _) ->
let ht = nested_hashtbl_of_results_varterm_list [key_vt] nested_vt_opt results in
nested_int_index_of_hashtbl
~mapfilter:(function [Some key] when filter key -> Some key | _ -> None)
~nested_mapfilter:(function Some nested when filter nested -> Some nested | _ -> None)
ht
| t, None -> singleton_index (t, (1,empty_index ()))
| t, Some nested_t -> singleton_index (t, (1,singleton_index (nested_t,1)))
let nested_index_of_results_varterm_list ~(mapfilter : Rdf.term option -> 'a option) keys_vt nested_vt_opt results : ('a list, 'a) nested_int_index =
if List.exists Rdf.term_is_var keys_vt
then begin
let ht = nested_hashtbl_of_results_varterm_list keys_vt nested_vt_opt results in
nested_int_index_of_hashtbl
~mapfilter:(fun keys -> Common.mapforall mapfilter keys)
~nested_mapfilter:(fun nested_opt -> mapfilter nested_opt)
ht end
else begin
let nested_index =
match nested_vt_opt with
| None -> empty_index ()
| Some nested_vt ->
( match nested_vt with
| Rdf.Var _ -> index_of_results_varterm ~filter:(fun t -> mapfilter (Some t) <> None) nested_vt results
| nested_t -> singleton_index (nested_t,1) ) in
match Common.mapforall (fun key_vt -> mapfilter (Some key_vt)) keys_vt with
| Some keys -> singleton_index (keys, (1, nested_index))
| None -> empty_index ()
end
let index_of_results_varterm_list_count (keys_vt : Rdf.term list) (var_count : Rdf.var) results : Rdf.term option list int_index =
let open Sparql_endpoint in
let get vt : binding -> Rdf.term option =
match vt with
| Rdf.Var v ->
if List.mem_assoc v results.vars
then
let i = List.assoc v results.vars in
(fun binding -> try binding.(i) with _ -> assert false)
else (fun binding -> None)
| t ->
(fun binding -> Some t) in
let get_keys =
let l_get_key = List.map get keys_vt in
(fun binding -> List.map ((|>) binding) l_get_key) in
let index = new int_index in
try
let i_count = try List.assoc var_count results.vars with _ -> -1 in
List.iter
(fun binding ->
let keys = get_keys binding in
let count =
if i_count < 0
then 1
else
match binding.(i_count) with
| Some (Rdf.Number (f,s,dt)) -> (try int_of_string s with _ -> 1)
| Some (Rdf.TypedLiteral (s,dt)) -> (try int_of_string s with _ -> 1)
| _ -> 1 in
index#add (keys, count))
results.bindings;
index
with Not_found ->
Jsutils.firebug "index_of_results_varterm_list_count: missing variables";
index
(* extraction of the extension and indexes *)
let enqueue_term = function
| Rdf.URI uri ->
Ontology.enqueue_entity uri;
Lexicon.enqueue_entity uri
| Rdf.TypedLiteral (_,dt) ->
Lexicon.enqueue_class dt
| _ -> ()
let enqueue_term_opt = function
| None -> ()
| Some t -> enqueue_term t
let enqueue_binding_terms binding =
Array.iter
(function
| Some t -> enqueue_term t
| None -> ())
binding
let sync_terms k =
Lexicon.sync_entities
(fun () ->
Ontology.sync_entities
(fun () -> k ()))
let sync_concepts k =
Ontology.sync_concepts
(fun () ->
Lexicon.sync_concepts
(fun () -> k ()))
let page_of_results
(offset : int) (limit : int)
(geolocs : (Sparql.term * (Rdf.var * Rdf.var)) list)
(results : Sparql_endpoint.results)
(k : Sparql_endpoint.results (* subset of results *) -> unit) : unit =
let open Sparql_endpoint in
let rec aux offset limit acc = function
| [] -> acc
| binding::l ->
if offset > 0 then aux (offset-1) limit acc l
else if limit > 0 then begin
enqueue_binding_terms binding;
aux offset (limit-1) (binding :: acc) l end
else acc
in
let partial_vars =
List.filter
(fun (v,i) -> not (List.exists (fun (_,(vlat,vlong)) -> v=vlat || v=vlong) geolocs))
results.vars in
let partial_bindings = List.rev (aux offset limit [] results.bindings) in
let partial_results = { results with vars = partial_vars; bindings = partial_bindings } in
sync_terms (* datatypes and entities *)
(fun () -> k partial_results)
let list_of_results_column (var : Rdf.var) results : Rdf.term list =
let open Sparql_endpoint in
try
let i = List.assoc var results.vars in
List.fold_left
(fun res binding ->
match binding.(i) with
| None -> res
| Some t -> t::res)
[] results.bindings
with Not_found ->
Jsutils.firebug ("list_of_results_column: missing variable " ^ var);
[]
(* use of dependencies between vars to structure query results *)
type results_shape =
| Unit
| Concat of results_shape list (* INV: at least 2 elements *)
| Map of Rdf.var * results_shape
| Descr of Rdf.term * results_shape
let rec string_of_results_shape = function
| Unit -> "[]"
| Concat lsh ->
String.concat " " (List.map (fun sh -> "< " ^ string_of_results_shape sh ^ " >") lsh)
| Map (x,Unit) -> "?" ^ x
| Map (x,sh) -> "?" ^ x ^ ", " ^ string_of_results_shape sh
| Descr (t,sh) -> Rdf.string_of_term t ^ ", " ^ string_of_results_shape sh
module FMDeps =
Find_merge.Set
(struct type t = Rdf.term let compare = Stdlib.compare end)
let results_shape_of_deps
(deps : Lisql2sparql.deps)
(geolocs : (Sparql.term * (Rdf.var * Rdf.var)) list)
(lx : Rdf.var list) : results_shape =
let deps =
let rec transform_dep = function
(* return dep made of variables only, and in-order term-var dependencies *)
(* term-var deps allow to insert Desc (term, ...) above the var *)
| [] -> [], []
| [t] ->
if Rdf.term_is_var t
then [t], []
else [], []
| t1::(t2::r2 as r1) ->
let dep_xs1, deps_tx1 = transform_dep r1 in
if Rdf.term_is_var t1
then t1::dep_xs1, deps_tx1
else (* t1 is not a var *)
if Rdf.term_is_var t2
then dep_xs1, [t1;t2]::deps_tx1 (* adding [term;var] dep *)
else dep_xs1, deps_tx1
in
List.fold_left
(fun deps dep ->
let dep_xs, deps_tx = transform_dep dep in
if dep_xs = []
then deps
else dep_xs :: deps_tx @ deps)
[] deps in
let lx = (* excluding geolocation variables *)
List.filter
(fun x -> not (List.exists (fun (_,(vlat,vlong)) -> x=vlat || x=vlong) geolocs))
lx in
let rec shape_of_deps deps lx : results_shape =
let fm_of_deps deps =
List.fold_left
(fun fm dep ->
match dep with
| [] -> fm
| _ -> snd (FMDeps.merge dep fm))
FMDeps.empty deps
in
match lx with
| [] -> Unit
| x1::lxr ->
let fm = fm_of_deps deps in
match list_shape_of_fm fm x1 lxr deps with
| [] -> Unit
| [sh] -> sh
| lsh -> Concat lsh
and list_shape_of_fm fm x1 lxr deps =
let tx1 = Rdf.Var x1 in
let terms_with_t1 = FMDeps.merged_with tx1 fm in
let t1 = (* looking for an adjacent non-var term of tx1, if any *)
List.fold_left
(fun res dep ->
if List.mem tx1 dep
then
List.fold_right
(fun t res2 ->
if t = tx1 then res (* ignoring terms after tx1 in dep *)
else if Rdf.term_is_var t then res2 (* ignoring other terms *)
else t)
dep res
else res)
tx1 deps in
let lxr = if t1 = tx1 then lxr else x1::lxr in
let lx1, lxr =
List.partition
(fun x -> List.mem (Rdf.Var x) terms_with_t1)
lxr in
let deps1, depsr =
List.partition
(function
| [] -> false
| t::_ -> List.mem t terms_with_t1)
deps in
let deps1 = (* removing t1 from dependencies *)
List.map
(fun dep1 ->
List.filter
(fun t -> t <> t1)
dep1)
deps1 in
let shape1 =
match t1 with
| Rdf.Var x1 -> Map (x1, shape_of_deps deps1 lx1)
| _ -> Descr (t1, shape_of_deps deps1 lx1) in
let lshaper =
match lxr with
| [] -> []
| x2::lxr -> list_shape_of_fm fm x2 lxr depsr in
shape1::lshaper
in
shape_of_deps deps lx
type shape_data =
[ `Unit
| `Concat of shape_data list
| `MapN of [`KeyVar | `KeyTerm] * Rdf.var option list * (Rdf.term option list * shape_data) list ]
let shape_data_of_results
(shape : results_shape)
(results : Sparql_endpoint.results)
(k : Rdf.var option list -> shape_data -> unit) : unit =
(* [f] stands for 'functional depth' *)
let open Sparql_endpoint in
let var_index = results.vars in (* assoc var -> binding index *)
let make_concat = function
| [] -> `Unit
| [d] -> d
| ld -> `Concat ld in
let rec aux shape bindings =
(* result: var list, solution count, functional depth, shape_data *)
match shape with
| Unit -> [], 1, 0, `Unit
| Concat lsh ->
let lv, c, f, ld =
List.fold_right
(fun sh (lv,c,f,ld) ->
let lvi, ci, fi, di = aux sh bindings in
let f = if fi = List.length lvi then fi+f else fi in
lvi@lv, ci*c, f, di::ld)
lsh ([],1,0,[]) in
lv, c, f, `Concat ld
| Map (v,sh) ->
let i =
try List.assoc v var_index
with Not_found -> assert false in
let rank = ref 0 in
let ht = Hashtbl.create 201 in
bindings
|> List.iter
(fun binding ->
incr rank;
let t_opt = binding.(i) in
try
let first_rank, ref_t_bindings = Hashtbl.find ht t_opt in
ref_t_bindings := binding::!ref_t_bindings
with Not_found ->
Hashtbl.add ht t_opt (!rank, ref [binding]));
let lv1, c, f1, rank_rows =
Hashtbl.fold
(fun t_opt (first_rank,ref_t_bindings) (lv,c,f1,rank_rows) ->
let lvi, ci, fi, di = aux sh (List.rev !ref_t_bindings) in (* all [lvi] are the same *)
enqueue_term_opt t_opt;
lvi, c+ci, min fi f1, (first_rank,t_opt,di)::rank_rows)
ht ([],0,max_int,[]) in
let lv = Some v::lv1 in
let f = if List.length rank_rows = 1 then 1+f1 else 0 in
let ranked_rows = List.sort Stdlib.compare rank_rows in
(* TODO : optimize when f=0 and f=1 *)
let d = mapn_of_rows `KeyVar lv f1 ranked_rows in
lv, c, f, d
| Descr (t,sh) ->
let lv1, c1, f1, d1 = aux sh bindings in
enqueue_term t;
let lv, c, f, ranked_rows = None::lv1, c1, 0 (*1+f1*), [(1, Some t, d1)] in
let d = mapn_of_rows `KeyTerm lv f1 ranked_rows in
lv, c, f, d
and mapn_of_rows key lv f1 ranked_rows : shape_data =
let lv_a, _ = Common.split_list_at lv (1+f1) in
let rows =
List.map
(fun (_,t_opt,di) ->
let lt_a, fd_b = row_of_data f1 di in
match fd_b with
| `D d_b -> t_opt :: lt_a, d_b
| `F _ -> assert false)
ranked_rows in
`MapN (key, lv_a, rows)
and row_of_data (f : int) (d : shape_data) : Rdf.term option list * [`F of int | `D of shape_data] =
if f = 0
then [], `D d
else
match d with
| `Unit -> [], `F f
| `Concat [] -> [], `F f
| `Concat (di::ldi) ->
let lt1, fd1 = row_of_data f di in
( match fd1 with
| `D d1 -> lt1, `D (make_concat (d1::ldi))
| `F f1 ->
let lt2, fd2 = row_of_data f1 (make_concat ldi) in
lt1 @ lt2, fd2 )
| `MapN (key,lv,[lt1,d1]) ->
let n = List.length lv in
if f < n
then
let lv_a, lv_b = Common.split_list_at lv f in
let lt1_a, lt1_b = Common.split_list_at lt1 f in
lt1_a, `D (`MapN (`KeyVar, lv_b, [lt1_b,d1]))
else (* f >= n *)
let lt2, fd2 = row_of_data (f-n) d1 in
lt1 @ lt2, fd2
| `MapN _ -> assert false
in
let lv, _c, _f, data = aux shape results.bindings in
sync_terms
(fun () -> k lv data)
(* slidewhow *)
type slide_data = { media_uri : Rdf.uri;
binding_fields : (string (* var name *) * Rdf.term option) list }
let slides_of_results results (k : slide_data list -> unit) : unit =
let open Sparql_endpoint in
let rev_l =
List.fold_left
(fun rev_l binding ->
Array.fold_left
(fun rev_l term_opt ->
match term_opt with
| Some (Rdf.URI uri)
when Rdf.uri_is_image uri || Rdf.uri_is_video uri ->
enqueue_binding_terms binding;
let data = { media_uri = uri;
binding_fields =
List.map
(fun (v,i) -> (v, binding.(i)))
results.vars;
} in
data::rev_l
| _ -> rev_l)
rev_l binding)
[] results.bindings
in
sync_terms
(fun () -> k (List.rev rev_l))
(* geolocations *)
let geolocations_of_results (geolocs : (Sparql.term * (Rdf.var * Rdf.var)) list) results (k : (float * float * Rdf.term) list -> unit) : unit =
let open Sparql_endpoint in
let l =
List.fold_left
(fun data ((t : Sparql.term), (v_lat,v_long)) ->
try
let i_lat = List.assoc v_lat results.vars in
let i_long = List.assoc v_long results.vars in
let get_term_opt =
let s = (t :> string) in
assert (s <> "");
if s.[0] = '?' then
let v = String.sub s 1 (String.length s - 1) in
let i_name = List.assoc v results.vars in
(fun binding -> binding.(i_name))
else
(fun binding -> None) in (* TODO *)
List.fold_left
(fun data binding ->
match binding.(i_lat), binding.(i_long), get_term_opt binding with
| Some (Rdf.Number (lat,_,_)), Some (Rdf.Number (long,_,_)), Some term ->
enqueue_term term;
(lat,long,term)::data
| _ -> data)
data results.bindings
with Not_found ->
Jsutils.firebug ("Missing geoloc vars in results: " ^ v_lat ^ ", " ^ v_long);
data)
[] geolocs in
sync_terms
(fun () -> k l)
(* external search *)
let ajax_external_search_constr ~endpoint (search : Lisql.search) (k : (Lisql.constr, exn) Result.t -> unit) : unit =
match search with
| WikidataSearch [] -> k (Result.Ok Lisql.True)
| WikidataSearch kwds ->
let query = String.concat "+" kwds in
(* limit = 10 (* TEST 30 *) in *)
Jsutils.Wikidata.ajax_entity_search
`Q query 30
(function
| Result.Ok lq ->
Jsutils.Wikidata.ajax_entity_search (* TODO: launch two call in parallel *)
`P query 10
(function
| Result.Ok lp ->
let ltq =
List.map
(fun q -> Rdf.URI (Rdf.wikidata_entity q))
lq in
let ltp =
List.map
(fun p ->
if config_nary_relations#value (* see Lisq2sparql.WhichProp/Pred *)
then Rdf.URI (Rdf.wikidata_prop p)
else Rdf.URI (Rdf.wikidata_prop_direct p))
lp in
let lt = ltq @ ltp in
k (Result.Ok (Lisql.ExternalSearch (search, Some lt)))
| (Result.Error _ as err)-> k err)
| (Result.Error _ as err) -> k err)
| TextQuery [] -> k (Result.Ok Lisql.True)
| TextQuery kwds ->
let lucene = Jsutils.lucene_query_of_kwds kwds in
if lucene = ""
then k (Result.Error (Failure "The Lucene query derived from keywords is empty"))
else (
let sparql =
let x = "x" in
Sparql.(select
~distinct:true
~projections:[`Bare, x]
~limit:config_max_increment_samples#value
(text_query (var x) lucene)) in
Sparql_endpoint.(ajax_in (* TODO: change this function for promises *)
[] (new ajax_pool)
endpoint (sparql :> string)
(fun sparql results ->
let lt =
List.fold_left
(fun lt binding ->
match binding with
| [| Some t |] -> t::lt
| _ -> lt)
[] results.bindings in
k (Result.Ok (Lisql.ExternalSearch (search, Some lt))))
(fun code ->
k (Result.Error (Failure ("The SPARQL query for retrieving entities matching a Lucene query failed with code " ^ string_of_int code))))))
(* hooks for Sparklis extension *)
let hook_suggestions : (freq_unit * suggestions) -> ((suggestions, exn) Result.t -> unit) -> unit =
let open Jsutils in
let js_unit_suggestions_map =
js_map
(`Record [| "type", js_custom_spec js_freq_unit_map;
"suggestions", js_custom_spec js_suggestions_map |]) in
fun unit_suggestions k ->
Config.apply_hook_data "suggestions"
Config.sparklis_extension##.hookSuggestions
js_unit_suggestions_map
unit_suggestions
(fun (_, suggestions) -> k (Result.Ok suggestions))
(* LIS navigation places *)
class place (endpoint : string) (focus : Lisql.focus) =
let ids, focus_descr, s_annot = Lisql_annot.annot_focus focus in
let query, path = Lisql.elt_s_path_of_focus focus in
object (self)
(* essential state *)
val endpoint = endpoint
method endpoint = endpoint
method focus : Lisql.focus = focus (* focus-centric query representation *)
method query_annot : Lisql_annot.annot Lisql.elt_s = s_annot (* annotated query *)
method query : unit Lisql.elt_s = query (* query *)
method path : Lisql.path = path (* focus path *)
method query_ids : Lisql_annot.Ids.t = ids (* defined entity ids in query *)
method focus_entity : Lisql_annot.focus_term = focus_descr#term (* entity at focus, if any *)
(* derived state *)
val mutable term_hierarchy : Rdf.uri Ontology.relation = Ontology.no_relation
val mutable id_labelling = new Lisql2nl.id_labelling []
method id_labelling = id_labelling
val mutable s_sparql : Lisql2sparql.s_sparql =
Lisql2sparql.({
state = new Lisql2sparql.state (new Lisql2nl.id_labelling []);
focus_term_opt = None;
focus_graph_opt = None;
focus_pred_args_opt = None;
deps = [];
query_opt = None;
query_count_opt = None;
query_class_opt = None;
query_prop_opt = None;
query_pred_opt = None;
query_arg_opt = None;
seq_view = 0, Lisql_annot.Unit })
method focus_term_opt = s_sparql.Lisql2sparql.focus_term_opt
method focus_graph_opt = s_sparql.Lisql2sparql.focus_graph_opt
method focus_pred_args_opt = s_sparql.Lisql2sparql.focus_pred_args_opt
method private init =
begin
term_hierarchy <- term_hierarchy_of_focus focus;
id_labelling <- Lisql2nl.id_labelling_of_s_annot Lisql2nl.config_lang#grammar s_annot;
s_sparql <- Lisql2sparql.s_annot id_labelling focus_descr s_annot
(*
Jsutils.firebug ("focus_term_opt = " ^ match s_sparql.Lisql2sparql.focus_term_opt with None -> "(none)" | Some t -> Rdf.string_of_term t);
Jsutils.firebug ("focus_graph_opt = " ^ match s_sparql.Lisql2sparql.focus_graph_opt with None -> "(none)" | Some t -> Rdf.string_of_term t);
Jsutils.firebug ("unconstrained focus = " ^ if focus_descr#unconstrained then "yes" else "no")
*)
end
initializer self#init
(* utilities *)
(*
method private is_qualifier_property pq =
match s_sparql.Lisql2sparql.focus_pred_args_opt with
| None -> false
| Some (pred,args) ->
let open Lisql in
match pred with
| Class _ -> false
| Prop _ -> false
| SO (ps,po) -> pq <> Rdf.rdf_type && pq <> ps && pq <> po
| EO (pe,po) -> pq <> Rdf.rdf_type && pq <> po
*)
val ajax_pool = new Sparql_endpoint.ajax_pool
method abort_all_ajax = ajax_pool#abort_all
(* place state *)
val mutable current_term_constr = Lisql.True
val mutable current_limit = config_max_results#value
(* SPARQL derived state: query and results *)
val mutable results_ok = false (* to know whether mutable vars below are defined *)
val mutable sparql_opt : string option = None
val mutable results = Sparql_endpoint.empty_results
val mutable results_shape = Unit
val mutable results_typing : Lisql_type.datatype list array = [||]
val mutable focus_type_constraints : Lisql_type.focus_type_constraints = Lisql_type.default_focus_type_constraints
val mutable focus_term_index : (Rdf.term, Rdf.term) nested_int_index = new nested_int_index (* used when some focus term *)
val mutable focus_graph_index : Rdf.term int_index = new int_index (* used when no focus_term but some focus graph *)
val mutable focus_pred_args_index : (Rdf.term list, Rdf.term) nested_int_index = new nested_int_index (* used when some focus-pred-args *)
val mutable some_focus_term_is_not_queryable : bool = false
method filter_type : Lisql.filter_type =
let has_IRI =
Lisql_type.(check_input_constraint focus_type_constraints.input_constr `IRI) in
let has_Literal =
Lisql_type.(check_input_constraint focus_type_constraints.input_constr `String) in
match has_IRI, has_Literal with
| true, true -> Mixed
| true, false -> OnlyIRIs
| false, true -> OnlyLiterals
| false, false -> OnlyIRIs
method ajax_sparql_results ?limit term_constr elts (k : unit -> unit) : unit =
(* define the new SPARQL query *)
let limit =
match limit with
| None -> config_max_results#value
| Some n -> n in
let new_sparql_opt =
match s_sparql.Lisql2sparql.query_opt with
| None -> None
| Some query ->
let ft = self#filter_type in
let sparql_genvar = s_sparql.Lisql2sparql.state#genvar in
let froms = Sparql_endpoint.config_default_graphs#froms in
let sparql = query
~hook:(fun tx form ->
let form_constr = Lisql2sparql.filter_constr_entity sparql_genvar tx term_constr ft in
if Lisql.hierarchy_of_focus focus = None (* TODO: improve this rough hack *)
then Sparql.formula_and form_constr form
else Sparql.formula_and form form_constr)
~froms ~limit () in
Some sparql in
(* define the query results, and continue when ready *)
match new_sparql_opt with
| None ->
results_ok <- true;
sparql_opt <- None;
results <- Sparql_endpoint.empty_results;
self#define_results_views;
k ()
| Some sparql ->
Sparql_endpoint.ajax_in
~main_query:true (* updating YASGUI, and hooking the query and results *)
elts ajax_pool endpoint sparql
(fun sparql res ->
results_ok <- true;
sparql_opt <- Some sparql;
results <- res;
self#define_results_views;
k ())
(fun code ->
(* no state update *)
k ())