forked from lojban/ilmentufa
-
Notifications
You must be signed in to change notification settings - Fork 0
/
camxes-beta-cbm-ckt.peg
1894 lines (1290 loc) · 63.2 KB
/
camxes-beta-cbm-ckt.peg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# camxes.js.peg
# Copyright (c) 2013, 2014 Masato Hagiwara
# https://github.com/mhagiwara/camxes.js
#
# camxes.js can be used, modified, and re-distributed under MIT license.
# See LICENSE for the details.
# This is a Parsing Expression Grammar for Lojban.
# See http://bford.info/packrat/
#
# All rules have the form:
#
# name = peg_expression
#
# which means that the grammatical construct "name" is parsed using
# "peg_expression".
#
# 1) Names in lower case are grammatical constructs.
# 2) Names in UPPER CASE are selma'o (lexeme) names, and are terminals.
# 3) Concatenation is expressed by juxtaposition with no operator symbol.
# 4) / represents *ORDERED* alternation (choice). If the first
# option succeeds, the others will never be checked.
# 5) ? indicates that the element to the left is optional.
# 6) * represents optional repetition of the construct to the left.
# 7) + represents one_or_more repetition of the construct to the left.
# 8) () serves to indicate the grouping of the other operators.
#
# Longest match wins.
# How to compile using Node.js: (Added by Masato Hagiwara)
# // load peg.js and the file system module
# > var PEG = require("pegjs")
# > var fs = require("fs")
# // read peg and build a parser
# > var camxes_peg = fs.readFileSync("/path/to/camxes.js.peg").toString();
# > var camxes = PEG.buildParser(camxes_peg, {cache: true});
# // test it
# > camxes.parse("ko'a broda");
# [ 'text',
# [ 'text_1',
# [ 'paragraphs', [Object] ] ] ]
# // write to a file
# > fs.writeFileSync("/path/to/camxes.js", camxes.toSource());
# ___ GRAMMAR ___
text <- intro_null NAI_clause* text_part_2 (!gek joik_jek)? text_1? faho_clause EOF?
intro_null <- initial_spaces? su_clause* intro_si_clause
text_part_2 <- indicators? free*
#; intro_sa_clause = SA_clause+ / any_word_SA_handling !(ZEI_clause SA_clause) intro_sa_clause
intro_si_clause <- si_clause? SI_clause*
faho_clause <- (FAhO_clause dot_star)?
# Please note that the "text_1" item in the text_1 production does
# *not* match the BNF. This is due to a bug in the BNF. The change
# here was made to match grammar.300
text_1 <- I_clause (jek / joik)? (stag? BO_clause)? free* text_1? / NIhO_clause+ free* su_clause* paragraphs? / paragraphs
paragraphs <- paragraph? (NIhO_clause+ free* su_clause* paragraphs)?
paragraph <- (statement / fragment) (I_clause !jek !joik !joik_jek free* (statement / fragment)?)*
# BEGIN BETA: IAU
statement <- statement_0 IAU_elidible free* terms?
statement_0 <- statement_1 / prenex statement
# END BETA: IAU
statement_1 <- statement_2 (I_clause joik_jek statement_2?)*
statement_2 <- statement_3 (I_clause (jek / joik)? stag? BO_clause free* statement_2?)?
statement_3 <- sentence / tag? TUhE_clause free* text_1 TUhU_elidible free*
# BETA: NA sequence fragments
fragment <- prenex / terms VAU_elidible free* / ek free* / gihek free* / quantifier / (NA_clause !JA_clause free*)+ / relative_clauses / links / linkargs
prenex <- terms ZOhU_clause free*
#; sentence = (terms CU_clause? free*)? bridi_tail / bridi_tail
# BETA: JACU, JE.I
sentence <- terms? bridi_tail_t1 (joik_jek bridi_tail / joik_jek stag? KE_clause free* bridi_tail KEhE_elidible free*)* (joik_jek I_clause free* subsentence)*
# sentence = expr:(((terms bridi_tail_sa*)? CU_elidible free*)? bridi_tail_sa* bridi_tail) {return _node("sentence", expr);}
# BETA: JACU
bridi_tail_t1 <- bridi_tail_t2 (joik_jek stag? KE_clause free* bridi_tail KEhE_elidible free*)?
# BETA: JACU
bridi_tail_t2 <- bridi_tail (joik_jek stag? BO_clause free* bridi_tail)?
sentence_sa <- sentence_start (!sentence_start (sa_word / SA_clause !sentence_start ) )* SA_clause &text_1
sentence_start <- I_pre / NIhO_pre
subsentence <- sentence / prenex subsentence
# BETA: JACU
bridi_tail <- bridi_tail_1 ((gihek / joik_jek) stag? KE_clause free* bridi_tail KEhE_elidible free* tail_terms)?
bridi_tail_sa <- bridi_tail_start (term / !bridi_tail_start (sa_word / SA_clause !bridi_tail_start ) )* SA_clause &bridi_tail
bridi_tail_start <- ME_clause / NUhA_clause / NU_clause / NA_clause !KU_clause / NAhE_clause !BO_clause / selbri / tag bridi_tail_start / KE_clause bridi_tail_start / bridi_tail
# BETA: JACU
bridi_tail_1 <- bridi_tail_2 ((gihek / joik_jek) !(stag? BO_clause) !(stag? KE_clause) free* bridi_tail_2 tail_terms)* # !LR2
# BETA: JACU
bridi_tail_2 <- CU_elidible free* bridi_tail_3 ((gihek / joik_jek) stag? BO_clause free* bridi_tail_2 tail_terms)?
# BETA: JACU
bridi_tail_3 <- (terms CU_elidible)* selbri tail_terms / gek_sentence
gek_sentence <- gek subsentence gik subsentence tail_terms / tag* KE_clause free* gek_sentence KEhE_elidible free* / NA_clause free* gek_sentence
tail_terms <- terms? VAU_elidible free*
terms <- terms_1+
#; terms_1 = terms_2 (PEhE_clause free* joik_jek terms_2)*
#; terms_2 = term (CEhE_clause free* term)*
terms_1 <- terms_2 (pehe_sa* PEhE_clause free* joik_jek terms_2)*
terms_2 <- term (cehe_sa* CEhE_clause free* nonabs_term)*
pehe_sa <- PEhE_clause (!PEhE_clause (sa_word / SA_clause !PEhE_clause))* SA_clause
cehe_sa <- CEhE_clause (!CEhE_clause (sa_word / SA_clause !CEhE_clause))* SA_clause
#;term = sumti / ( !gek (tag / FA_clause free*) (sumti / KU_elidible free*) ) / termset / NA_clause KU_clause free*
term <- term_sa* term_1
# BEGIN BETA: TERM JA TERM
term_1 <- term_2 (joik_ek term_2)*
term_2 <- term_3 (joik_ek? stag? BO_clause term_3)*
term_3 <- sumti / tag_term / nontag_adverbial / termset
tag_term <- !gek (tag !(!tag selbri) / FA_clause free*) (sumti / KU_elidible free*)
nonabs_term <- term_sa* nonabs_term_1
nonabs_term_1 <- nonabs_term_2 (joik_ek term_2)*
nonabs_term_2 <- nonabs_term_3 (joik_ek? stag? BO_clause term_3)*
nonabs_term_3 <- sumti / nonabs_tag_term / nontag_adverbial / termset
nonabs_tag_term <- !gek (tag !(!tag selbri) / FA_clause free*) (sumti / KU_elidible free*)
# BETA: NOIhA, New-SOI
nontag_adverbial <- NA_clause free* KU_clause free* / NOIhA_clause free* sumti_tail SEhU_elidible free* / SOI_clause free* subsentence SEhU_elidible free*
# END BETA: TERM JA TERM
term_sa <- term_start (!term_start (sa_word / SA_clause !term_start ) )* SA_clause &term_1
term_start <- term_1 / LE_clause / LI_clause / LU_clause / LAhE_clause / quantifier term_start / gek sumti gik / FA_clause / tag term_start
# BETA: KE-termset
termset <- gek_termset / NUhI_clause free* gek terms NUhU_elidible free* gik terms NUhU_elidible free* / NUhI_clause free* terms NUhU_elidible free* / KE_clause terms KEhE_elidible
gek_termset <- gek terms_gik_terms
terms_gik_terms <- nonabs_term (gik / terms_gik_terms) nonabs_term
# BETA: Enhanced VUhO
sumti <- sumti_1 (VUhO_clause free* (relative_clauses (joik_ek sumti)?)?)?
sumti_1 <- sumti_2 (joik_ek stag? KE_clause free* sumti KEhE_elidible free*)?
sumti_2 <- sumti_3 (joik_ek sumti_3)* # !LR2
sumti_3 <- sumti_4 (joik_ek stag? BO_clause free* sumti_3)?
sumti_4 <- sumti_5 / gek sumti gik sumti_4
sumti_5 <- quantifier? sumti_6 relative_clauses? / quantifier selbri KU_elidible free* relative_clauses?
# BETA: NAhE+SUMTI, LAhE+TERM, ZOhOI, LOhOI
sumti_6 <- ZO_clause free* / ZOI_clause free* / ZOhOI_clause free* / LOhU_clause free* / lerfu_string !MOI_clause BOI_elidible free* / LU_clause text LIhU_elidible free* / (LAhE_clause free* / NAhE_clause BO_clause? free*) (relative_clauses? sumti / term) LUhU_elidible free* / KOhA_clause free* / LE_clause free* sumti_tail KU_elidible free* / li_clause / LOhOI_clause free* subsentence KUhAU_clause free*
li_clause <- LI_clause free* mex LOhO_elidible free*
sumti_tail <- (sumti_6 relative_clauses?)? sumti_tail_1 / relative_clauses sumti_tail_1
sumti_tail_1 <- selbri relative_clauses? / quantifier selbri relative_clauses? / quantifier sumti
# BETA: JAPOI
relative_clauses <- relative_clause ((ZIhE_clause / joik_jek) free* relative_clause)* / gek relative_clauses gik relative_clauses
#; relative_clause = GOI_clause free* term GEhU_clause? free* / NOI_clause free* subsentence KUhO_clause? free*
relative_clause <- relative_clause_sa* relative_clause_1
relative_clause_sa <- relative_clause_start (!relative_clause_start (sa_word / SA_clause !relative_clause_start ) )* SA_clause &relative_clause_1
relative_clause_1 <- GOI_clause free* nonabs_term GEhU_elidible free* / NOI_clause free* subsentence KUhO_elidible free*
relative_clause_start <- GOI_clause / NOI_clause
selbri <- tag? selbri_1
selbri_1 <- selbri_2 / NA_clause free* selbri
selbri_2 <- selbri_3 (CO_clause free* selbri_2)?
selbri_3 <- selbri_4+ # !LR
selbri_4 <- selbri_5 (joik_jek selbri_5 / joik stag? KE_clause free* selbri_3 KEhE_elidible free*)* # !LR2
selbri_5 <- selbri_6 ((jek / joik) stag? BO_clause free* selbri_5)?
selbri_6 <- tanru_unit (BO_clause free* selbri_6)? / NAhE_clause? free* guhek selbri gik selbri_6
tanru_unit <- tanru_unit_1 (CEI_clause free* tanru_unit_1)*
tanru_unit_1 <- tanru_unit_2 linkargs?
# ** zei is part of BRIVLA_clause
# BETA: Bare MEX, NUhA+opeator, GOhOI, MEhOI
tanru_unit_2 <- BRIVLA_clause free* / GOhA_clause RAhO_clause? free* / KE_clause free* selbri_3 KEhE_elidible free* / ME_clause free* (sumti / lerfu_string) MEhU_elidible free* MOI_clause? free* / mex MOI_clause free* / NUhA_clause free* operator / SE_clause free* tanru_unit_2 / JAI_clause free* tag? tanru_unit_2 / NAhE_clause free* tanru_unit_2 / NU_clause NAI_clause? free* (joik_jek NU_clause NAI_clause? free*)* subsentence KEI_elidible free* / GOhOI_clause free* / MEhOI_clause free*
#; linkargs = BE_clause free* term links? BEhO_clause? free*
linkargs <- linkargs_sa* linkargs_1
linkargs_1 <- BE_clause free* nonabs_term links? BEhO_elidible free*
linkargs_sa <- linkargs_start (!linkargs_start (sa_word / SA_clause !linkargs_start ) )* SA_clause &linkargs_1
linkargs_start <- BE_clause
#; links = BEI_clause free* term links?
links <- links_sa* links_1
links_1 <- BEI_clause free* nonabs_term links?
links_sa <- links_start (!links_start (sa_word / SA_clause !links_start ) )* SA_clause &links_1
links_start <- BEI_clause
# BEGIN BETA: MEX simplification
quantifier <- !selbri !sumti_6 mex
#;mex = mex_1 (operator mex_1)* / rp_clause
mex <- mex_1 (operator mex_1)*
mex_1 <- mex_2 (operator stag? BO_clause free* mex_1)?
mex_2 <- number BOI_elidible free* / lerfu_string BOI_elidible free* / VEI_clause free* mex VEhO_elidible free* / NIhE_clause free* selbri TEhU_elidible free* / MOhE_clause sumti TEhU_elidible free* / gek mex gik mex_2 / (LAhE_clause free* / NAhE_clause free* BO_clause? free*) mex LUhU_elidible free* / PEhO_clause free* operator mex+ KUhE_elidible free* / FUhA_clause rp_expression
rp_expression <- mex_1 (rp_expression operator)*
# END BETA: MEX simplification
#; operator = operator_1 (joik_jek operator_1 / joik stag? KE_clause free* operator KEhE_clause? free*)*
operator <- operator_sa* operator_0
operator_0 <- operator_1 (joik_jek operator_1 / joik stag? KE_clause free* operator KEhE_elidible free*)*
operator_sa <- operator_start (!operator_start (sa_word / SA_clause !operator_start) )* SA_clause &operator_0
operator_start <- guhek / KE_clause / SE_clause? NAhE_clause / SE_clause? MAhO_clause / SE_clause? VUhU_clause
# BETA: MEX simplification
operator_1 <- guhek operator_1 gik operator_2 / operator_2 (jek / joik) stag? BO_clause free* operator_1 / operator_2
operator_2 <- mex_operator / KE_clause free* operator KEhE_elidible free*
# BETA: MEX simplification
mex_operator <- SE_clause free* mex_operator / NAhE_clause free* mex_operator / MAhO_clause free* mex TEhU_elidible free* / NAhU_clause free* selbri TEhU_elidible free* / VUhU_clause free* / joik_jek free* / ek free*
#; operand = operand_1 (joik_ek stag? KE_clause free* operand KEhE_clause? free*)?
operand <- operand_sa* operand_0
operand_0 <- operand_1 (joik_ek stag? KE_clause free* operand KEhE_elidible free*)?
operand_sa <- operand_start (!operand_start (sa_word / SA_clause !operand_start) )* SA_clause &operand_0
operand_start <- quantifier / lerfu_word / NIhE_clause / MOhE_clause / JOhI_clause / gek / LAhE_clause / NAhE_clause
operand_1 <- operand_2 (joik_ek operand_2)*
operand_2 <- operand_3 (joik_ek stag? BO_clause free* operand_2)?
# BETA: NAhE+SUMTI
operand_3 <- quantifier / lerfu_string !MOI_clause BOI_elidible free* / NIhE_clause free* selbri TEhU_elidible free* / MOhE_clause free* sumti TEhU_elidible free* / JOhI_clause free* mex_2+ TEhU_elidible free* / gek operand gik operand_3 / (LAhE_clause free* / NAhE_clause BO_clause? free*) operand LUhU_elidible free*
# BETA: MEX simplification
number <- (PA_clause / (NIhE_clause free* selbri TEhU_elidible free*) / (MOhE_clause free* sumti TEhU_elidible free*)) (PA_clause / (NIhE_clause free* selbri TEhU_elidible free*) / (MOhE_clause free* sumti TEhU_elidible free*))*
lerfu_string <- lerfu_word (PA_clause / lerfu_word)*
# ** BU clauses are part of BY_clause
lerfu_word <- BY_clause / LAU_clause lerfu_word / TEI_clause lerfu_string FOI_clause
ek <- NA_clause? SE_clause? A_clause NAI_clause?
#; gihek = NA_clause? SE_clause? GIhA_clause NAI_clause?
gihek <- gihek_sa* gihek_1
gihek_1 <- NA_clause? SE_clause? GIhA_clause NAI_clause?
gihek_sa <- gihek_1 (!gihek_1 (sa_word / SA_clause !gihek_1 ) )* SA_clause &gihek
# BETA: NAhU included in jek
jek <- NA_clause? SE_clause? (JA_clause / NAhU_clause free* selbri TEhU_elidible free*) NAI_clause?
joik <- SE_clause? JOI_clause NAI_clause? / interval / GAhO_clause interval GAhO_clause
interval <- SE_clause? BIhI_clause NAI_clause?
#; joik_ek = joik free* / ek free*
joik_ek <- joik_ek_sa* joik_ek_1
# BETA: A/JA/JOI/VUhU Merger
joik_ek_1 <- joik_jek
joik_ek_sa <- joik_ek_1 (!joik_ek_1 (sa_word / SA_clause !joik_ek_1 ) )* SA_clause &joik_ek
# BETA: A/JA/JOI/VUhU Merger
joik_jek <- joik free* / ek free* / jek free* / VUhU_clause free*
# BETA: gaJA
gek <- gak SE_clause? joik_jek / SE_clause? GA_clause free* / joik GI_clause free* / stag gik
# BETA: gaJA
gak <- ga_clause !gek free*
# BETA: guJA
guhek <- guk SE_clause? joik_jek / SE_clause? GUhA_clause NAI_clause? free*
# BETA: guJA
guk <- gu_clause !gek free*
gik <- GI_clause NAI_clause? free*
tag <- tense_modal (joik_jek tense_modal)*
#stag = simple_tense_modal ((jek / joik) simple_tense_modal)*
# BETA: Tag simplification
stag <- tense_modal (joik_jek tense_modal)*
# BETA: Tag simplification (dependency: NAI ∈ indicator)
# FIXME: Cannot use bare MEX with ROI.
tense_modal <- ((NAhE_clause? SE_clause? (BAI_clause / CAhA_clause / CUhE_clause / KI_clause / ZI_clause / PU_clause / VA_clause / MOhI_clause? FAhA_clause / ZEhA_clause / VEhA_clause / VIhA_clause / FEhE_clause? (VEI_clause free* mex VEhO_elidible free* / number) ROI_clause / FEhE_clause? TAhE_clause / FEhE_clause? ZAhO_clause / FIhO_clause free* selbri FEhU_elidible free* / FA_clause) free*)+)
# BETA: Bare MEX, LOhAI, Removal of Old-SOI
free <- SEI_clause free* (terms CU_elidible free*)? selbri SEhU_elidible / vocative relative_clauses? selbri relative_clauses? DOhU_elidible / vocative sumti? DOhU_elidible / mex_2 MAI_clause free* / TO_clause text TOI_elidible / xi_clause / LOhAI_clause
# BETA: Bare MEX
xi_clause <- XI_clause free* mex_2 / XI_clause free* VEI_clause free* mex VEhO_elidible
vocative <- (COI_clause NAI_clause?)+ DOI_clause / (COI_clause NAI_clause?) (COI_clause NAI_clause?)* / DOI_clause
indicators <- FUhE_clause? indicator+
# BETA: NAI ∈ indicator
indicator <- ((UI_clause / CAI_clause) NAI_clause? / NAI_clause / DAhO_clause / FUhO_clause) !BU_clause
# ****************
# Magic Words
# ****************
zei_clause <- pre_clause zei_clause_no_pre
zei_clause_no_pre <- pre_zei_bu (zei_tail? BU_clause+)* zei_tail post_clause # !LR
# zei_clause_no_SA = pre_zei_bu_no_SA (zei_tail? bu_tail)* zei_tail
bu_clause <- pre_clause bu_clause_no_pre
bu_clause_no_pre <- pre_zei_bu (BU_clause* zei_tail)* BU_clause+ post_clause # !LR
# bu_clause_no_SA = pre_zei_bu_no_SA (bu_tail? zei_tail)* bu_tail
zei_tail <- (ZEI_clause any_word)+
bu_tail <- BU_clause+ # Obsolete: please use BU_clause+ instead for allowing later left-grouping faking.
pre_zei_bu <- !ZOI_start !BU_clause !ZEI_clause !SI_clause !SA_clause !SU_clause !FAhO_clause any_word_SA_handling si_clause?
# LOhU_pre / ZO_pre / ZOI_pre / !ZEI_clause !BU_clause !FAhO_clause !SI_clause !SA_clause !SU_clause any_word_SA_handling si_clause?
# pre_zei_bu_no_SA = LOhU_pre / ZO_pre / ZOI_pre / !ZEI_clause !BU_clause !FAhO_clause !SI_clause !SA_clause !SU_clause any_word si_clause?
dot_star <- .*
# __ General Morphology Issues
#
# 1. Spaces (including '.y') and UI are eaten *after* a word.
#
# 3. BAhE is eaten *before* a word.
# Handling of what can go after a cmavo
post_clause <- spaces? si_clause? !ZEI_clause !BU_clause indicators*
pre_clause <- BAhE_clause* # !LR
#any_word_SA_handling = BRIVLA_pre / known_cmavo_SA / !known_cmavo_pre CMAVO_pre / CMEVLA_pre
any_word_SA_handling <- BRIVLA_pre / known_cmavo_SA / CMAVO_pre
known_cmavo_SA <- A_pre / BAI_pre / BAhE_pre / BE_pre / BEI_pre / BEhO_pre / BIhE_pre / BIhI_pre / BO_pre / BOI_pre / BU_pre / BY_pre / CAI_pre / CAhA_pre / CEI_pre / CEhE_pre / CO_pre / COI_pre / CU_pre / CUhE_pre / DAhO_pre / DOI_pre / DOhU_pre / FA_pre / FAhA_pre / FEhE_pre / FEhU_pre / FIhO_pre / FOI_pre / FUhA_pre / FUhE_pre / FUhO_pre / GA_pre / GAhO_pre / GEhU_pre / GI_pre / GIhA_pre / GOI_pre / GOhA_pre / GUhA_pre / I_pre / JA_pre / JAI_pre / JOI_pre / JOhI_pre / KE_pre / KEI_pre / KEhE_pre / KI_pre / KOhA_pre / KU_pre / KUhE_pre / KUhO_pre / LAU_pre / LAhE_pre / LE_pre / LEhU_pre / LI_pre / LIhU_pre / LOhO_pre / LOhU_pre / LU_pre / LUhU_pre / MAI_pre / MAhO_pre / ME_pre / MEhU_pre / MOI_pre / MOhE_pre / MOhI_pre / NA_pre / NAI_pre / NAhE_pre / NAhU_pre / NIhE_pre / NIhO_pre / NOI_pre / NU_pre / NUhA_pre / NUhI_pre / NUhU_pre / PA_pre / PEhE_pre / PEhO_pre / PU_pre / RAhO_pre / ROI_pre / SA_pre / SE_pre / SEI_pre / SEhU_pre / SI_clause / SOI_pre / SU_pre / TAhE_pre / TEI_pre / TEhU_pre / TO_pre / TOI_pre / TUhE_pre / TUhU_pre / UI_pre / VA_pre / VAU_pre / VEI_pre / VEhA_pre / VEhO_pre / VIhA_pre / VUhO_pre / VUhU_pre / XI_pre / ZAhO_pre / ZEI_pre / ZEhA_pre / ZI_pre / ZIhE_pre / ZO_pre / ZOI_pre / ZOhU_pre
# Handling of spaces and things like spaces.
# ___ SPACE ___
# Do *NOT* delete the line above!
# SU clauses
su_clause <- (erasable_clause / su_word)* SU_clause
# Handling of SI and interactions with zo and lo'u...le'u
si_clause <- ((erasable_clause / si_word / SA_clause) si_clause? SI_clause)+
erasable_clause <- bu_clause_no_pre !ZEI_clause !BU_clause / zei_clause_no_pre !ZEI_clause !BU_clause
sa_word <- pre_zei_bu
si_word <- pre_zei_bu
su_word <- !ZOI_start !NIhO_clause !LU_clause !TUhE_clause !TO_clause !SU_clause !FAhO_clause any_word_SA_handling
# ___ ELIDIBLE TERMINATORS ___
# BETA: IAU
BEhO_elidible <- BEhO_clause?
BOI_elidible <- BOI_clause?
CU_elidible <- CU_clause?
DOhU_elidible <- DOhU_clause?
FEhU_elidible <- FEhU_clause?
# FOI and FUhO are never elidible
GEhU_elidible <- GEhU_clause?
IAU_elidible <- IAU_clause?
KEI_elidible <- KEI_clause?
KEhE_elidible <- KEhE_clause?
KU_elidible <- KU_clause?
KUhE_elidible <- KUhE_clause?
KUhO_elidible <- KUhO_clause?
# LEhU is never elidible
LIhU_elidible <- LIhU_clause?
LOhO_elidible <- LOhO_clause?
LUhU_elidible <- LUhU_clause?
MEhU_elidible <- MEhU_clause?
NUhU_elidible <- NUhU_clause?
SEhU_elidible <- SEhU_clause?
TEhU_elidible <- TEhU_clause?
TOI_elidible <- TOI_clause?
TUhU_elidible <- TUhU_clause?
VAU_elidible <- VAU_clause?
VEhO_elidible <- VEhO_clause?
# ___ SELMAHO ___
# Do *NOT* delete the line above!
BRIVLA_clause <- BRIVLA_pre BRIVLA_post / zei_clause
BRIVLA_pre <- pre_clause BRIVLA spaces?
BRIVLA_post <- post_clause
# BRIVLA_no_SA_handling = pre_clause BRIVLA post_clause / zei_clause_no_SA
# CMEVLA_no_SA_handling = pre_clause CMEVLA post_clause
CMAVO_clause <- CMAVO_pre CMAVO_post
CMAVO_pre <- pre_clause CMAVO spaces?
CMAVO_post <- post_clause
# CMAVO_no_SA_handling = pre_clause CMAVO post_clause
# eks; basic afterthought logical connectives
A_clause <- A_pre A_post
A_pre <- pre_clause A spaces?
A_post <- post_clause
# A_no_SA_handling = pre_clause A post_clause
# modal operators
BAI_clause <- BAI_pre BAI_post
BAI_pre <- pre_clause BAI spaces?
BAI_post <- post_clause
# BAI_no_SA_handling = pre_clause BAI post_clause
# next word intensifier
BAhE_clause <- BAhE_pre BAhE_post
BAhE_pre <- BAhE spaces?
BAhE_post <- si_clause? !ZEI_clause !BU_clause
# BAhE_no_SA_handling = BAhE spaces? BAhE_post
# sumti link to attach sumti to a selbri
BE_clause <- BE_pre BE_post
BE_pre <- pre_clause BE spaces?
BE_post <- post_clause
# BE_no_SA_handling = pre_clause BE post_clause
# multiple sumti separator between BE, BEI
BEI_clause <- BEI_pre BEI_post
BEI_pre <- pre_clause BEI spaces?
BEI_post <- post_clause
# BEI_no_SA_handling = pre_clause BEI post_clause
# terminates BEBEI specified descriptors
BEhO_clause <- BEhO_pre BEhO_post
BEhO_pre <- pre_clause BEhO spaces?
BEhO_post <- post_clause
# BEhO_no_SA_handling = pre_clause BEhO post_clause
# prefix for high_priority MEX operator
BIhE_clause <- BIhE_pre BIhE_post
BIhE_pre <- pre_clause BIhE spaces?
BIhE_post <- post_clause
# BIhE_no_SA_handling = pre_clause BIhE post_clause
# interval component of JOI
BIhI_clause <- BIhI_pre BIhI_post
BIhI_pre <- pre_clause BIhI spaces?
BIhI_post <- post_clause
# BIhI_no_SA_handling = pre_clause BIhI post_clause
# joins two units with shortest scope
BO_clause <- BO_pre BO_post
BO_pre <- pre_clause BO spaces?
BO_post <- post_clause
# BO_no_SA_handling = pre_clause BO post_clause
# number or lerfu_string terminator
BOI_clause <- BOI_pre BOI_post
BOI_pre <- pre_clause BOI spaces?
BOI_post <- post_clause
# BOI_no_SA_handling = pre_clause BOI post_clause
# turns any word into a BY lerfu word
BU_clause <- BU_pre BU_post
# BU_clause_no_SA = BU_pre_no_SA BU BU_post
BU_pre <- pre_clause BU spaces?
# BU_pre_no_SA = pre_clause
BU_post <- spaces?
# BU_no_SA_handling = pre_clause BU spaces?
# individual lerfu words
BY_clause <- BY_pre BY_post / bu_clause
BY_pre <- pre_clause BY spaces?
BY_post <- post_clause
# BY_no_SA_handling = pre_clause BY post_clause / bu_clause_no_SA
# specifies actualitypotentiality of tense
CAhA_clause <- CAhA_pre CAhA_post
CAhA_pre <- pre_clause CAhA spaces?
CAhA_post <- post_clause
# CAhA_no_SA_handling = pre_clause CAhA post_clause
# afterthought intensity marker
CAI_clause <- CAI_pre CAI_post
CAI_pre <- pre_clause CAI spaces?
CAI_post <- post_clause
# CAI_no_SA_handling = pre_clause CAI post_clause
# pro_bridi assignment operator
CEI_clause <- CEI_pre CEI_post
CEI_pre <- pre_clause CEI spaces?
CEI_post <- post_clause
# CEI_no_SA_handling = pre_clause CEI post_clause
# afterthought term list connective
CEhE_clause <- CEhE_pre CEhE_post
CEhE_pre <- pre_clause CEhE spaces?
CEhE_post <- post_clause
# CEhE_no_SA_handling = pre_clause CEhE post_clause
# names; require consonant end, then pause no
# LA or DOI selma'o embedded, pause before if
# vowel initial and preceded by a vowel
# tanru inversion
CO_clause <- CO_pre CO_post
CO_pre <- pre_clause CO spaces?
CO_post <- post_clause
# CO_no_SA_handling = pre_clause CO post_clause
COI_clause <- COI_pre COI_post
COI_pre <- pre_clause COI spaces?
COI_post <- post_clause
# COI_no_SA_handling = pre_clause COI post_clause
# vocative marker permitted inside names; must
# always be followed by pause or DOI
# separator between head sumti and selbri
CU_clause <- CU_pre CU_post
CU_pre <- pre_clause CU spaces?
CU_post <- post_clause
# CU_no_SA_handling = pre_clause CU post_clause
# tensemodal question
CUhE_clause <- CUhE_pre CUhE_post
CUhE_pre <- pre_clause CUhE spaces?
CUhE_post <- post_clause
# CUhE_no_SA_handling = pre_clause CUhE post_clause
# cancel anaphoracataphora assignments
DAhO_clause <- DAhO_pre DAhO_post
DAhO_pre <- pre_clause DAhO spaces?
DAhO_post <- post_clause
# DAhO_no_SA_handling = pre_clause DAhO post_clause
# vocative marker
DOI_clause <- DOI_pre DOI_post
DOI_pre <- pre_clause DOI spaces?
DOI_post <- post_clause
# DOI_no_SA_handling = pre_clause DOI post_clause
# terminator for DOI_marked vocatives
DOhU_clause <- DOhU_pre DOhU_post
DOhU_pre <- pre_clause DOhU spaces?
DOhU_post <- post_clause
# DOhU_no_SA_handling = pre_clause DOhU post_clause
# modifier head generic case tag
FA_clause <- FA_pre FA_post
FA_pre <- pre_clause FA spaces?
FA_post <- post_clause
# FA_no_SA_handling = pre_clause FA post_clause
# superdirections in space
FAhA_clause <- FAhA_pre FAhA_post
FAhA_pre <- pre_clause FAhA spaces?
FAhA_post <- post_clause
# FAhA_no_SA_handling = pre_clause FAhA post_clause
# normally elided 'done pause' to indicate end
# of utterance string
FAhO_clause <- pre_clause FAhO spaces?
# space interval mod flag
FEhE_clause <- FEhE_pre FEhE_post
FEhE_pre <- pre_clause FEhE spaces?
FEhE_post <- post_clause
# FEhE_no_SA_handling = pre_clause FEhE post_clause
# ends bridi to modal conversion
FEhU_clause <- FEhU_pre FEhU_post
FEhU_pre <- pre_clause FEhU spaces?
FEhU_post <- post_clause
# FEhU_no_SA_handling = pre_clause FEhU post_clause
# marks bridi to modal conversion
FIhO_clause <- FIhO_pre FIhO_post
FIhO_pre <- pre_clause FIhO spaces?
FIhO_post <- post_clause
# FIhO_no_SA_handling = pre_clause FIhO post_clause
# end compound lerfu
FOI_clause <- FOI_pre FOI_post
FOI_pre <- pre_clause FOI spaces?
FOI_post <- post_clause
# FOI_no_SA_handling = pre_clause FOI post_clause
# reverse Polish flag
FUhA_clause <- FUhA_pre FUhA_post
FUhA_pre <- pre_clause FUhA spaces?
FUhA_post <- post_clause
# FUhA_no_SA_handling = pre_clause FUhA post_clause
# open long scope for indicator
FUhE_clause <- FUhE_pre FUhE_post
FUhE_pre <- pre_clause FUhE spaces?
FUhE_post <- !BU_clause spaces? !ZEI_clause !BU_clause
# FUhE_no_SA_handling = pre_clause FUhE post_clause
# close long scope for indicator
FUhO_clause <- FUhO_pre FUhO_post
FUhO_pre <- pre_clause FUhO spaces?
FUhO_post <- post_clause
# FUhO_no_SA_handling = pre_clause FUhO post_clause
# geks; forethought logical connectives
GA_clause <- GA_pre GA_post
GA_pre <- pre_clause GA spaces?
GA_post <- post_clause
# GA_no_SA_handling = pre_clause GA post_clause
# openclosed interval markers for BIhI
GAhO_clause <- GAhO_pre GAhO_post
GAhO_pre <- pre_clause GAhO spaces?
GAhO_post <- post_clause
# GAhO_no_SA_handling = pre_clause GAhO post_clause
# marker ending GOI relative clauses
GEhU_clause <- GEhU_pre GEhU_post
GEhU_pre <- pre_clause GEhU spaces?
GEhU_post <- post_clause
# GEhU_no_SA_handling = pre_clause GEhU post_clause
# forethought medial marker
GI_clause <- GI_pre GI_post
GI_pre <- pre_clause GI spaces?
GI_post <- post_clause
# GI_no_SA_handling = pre_clause GI post_clause
# logical connectives for bridi_tails
GIhA_clause <- GIhA_pre GIhA_post
GIhA_pre <- pre_clause GIhA spaces?
GIhA_post <- post_clause
# GIhA_no_SA_handling = pre_clause GIhA post_clause
# attaches a sumti modifier to a sumti
GOI_clause <- GOI_pre GOI_post
GOI_pre <- pre_clause GOI spaces?
GOI_post <- post_clause
# GOI_no_SA_handling = pre_clause GOI post_clause
# pro_bridi
GOhA_clause <- GOhA_pre GOhA_post
GOhA_pre <- pre_clause GOhA spaces?
GOhA_post <- post_clause
# GOhA_no_SA_handling = pre_clause GOhA post_clause
# GEK for tanru units, corresponds to JEKs
GUhA_clause <- GUhA_pre GUhA_post
GUhA_pre <- pre_clause GUhA spaces?
GUhA_post <- post_clause
# GUhA_no_SA_handling = pre_clause GUhA post_clause
# sentence link
I_clause <- sentence_sa* I_pre I_post
I_pre <- pre_clause I spaces?
I_post <- post_clause
# I_no_SA_handling = pre_clause I post_clause
# jeks; logical connectives within tanru
JA_clause <- JA_pre JA_post
JA_pre <- pre_clause JA spaces?
JA_post <- post_clause
# JA_no_SA_handling = pre_clause JA post_clause
# modal conversion flag
JAI_clause <- JAI_pre JAI_post
JAI_pre <- pre_clause JAI spaces?
JAI_post <- post_clause
# JAI_no_SA_handling = pre_clause JAI post_clause
# flags an array operand
JOhI_clause <- JOhI_pre JOhI_post
JOhI_pre <- pre_clause JOhI spaces?
JOhI_post <- post_clause
# JOhI_no_SA_handling = pre_clause JOhI post_clause
# non_logical connectives
JOI_clause <- JOI_pre JOI_post
JOI_pre <- pre_clause JOI spaces?
JOI_post <- post_clause
# JOI_no_SA_handling = pre_clause JOI post_clause
# left long scope marker
KE_clause <- KE_pre KE_post
KE_pre <- pre_clause KE spaces?
KE_post <- post_clause
# KE_no_SA_handling = pre_clause KE post_clause
# right terminator for KE groups
KEhE_clause <- KEhE_pre KEhE_post
KEhE_pre <- pre_clause KEhE spaces?
KEhE_post <- post_clause
# KEhE_no_SA_handling = pre_clause KEhE post_clause
# right terminator, NU abstractions
KEI_clause <- KEI_pre KEI_post
KEI_pre <- pre_clause KEI spaces?
KEI_post <- post_clause
KEI_no_SA_handling <- pre_clause KEI post_clause
# multiple utterance scope for tenses
KI_clause <- KI_pre KI_post
KI_pre <- pre_clause KI spaces?
KI_post <- post_clause
# KI_no_SA_handling = pre_clause KI post_clause
# sumti anaphora
KOhA_clause <- KOhA_pre KOhA_post
KOhA_pre <- pre_clause KOhA spaces?
KOhA_post <- post_clause
# KOhA_no_SA_handling = pre_clause KOhA spaces?
# right terminator for descriptions, etc.
KU_clause <- KU_pre KU_post
KU_pre <- pre_clause KU spaces?
KU_post <- post_clause
# KU_no_SA_handling = pre_clause KU post_clause
# MEX forethought delimiter
KUhE_clause <- KUhE_pre KUhE_post
KUhE_pre <- pre_clause KUhE spaces?
KUhE_post <- post_clause
# KUhE_no_SA_handling = pre_clause KUhE post_clause
# right terminator, NOI relative clauses
KUhO_clause <- KUhO_pre KUhO_post
KUhO_pre <- pre_clause KUhO spaces?
KUhO_post <- post_clause
# KUhO_no_SA_handling = pre_clause KUhO post_clause
# name descriptors
# LA_no_SA_handling = pre_clause LA post_clause
# lerfu prefixes
LAU_clause <- LAU_pre LAU_post
LAU_pre <- pre_clause LAU spaces?
LAU_post <- post_clause
# LAU_no_SA_handling = pre_clause LAU post_clause
# sumti qualifiers
LAhE_clause <- LAhE_pre LAhE_post
LAhE_pre <- pre_clause LAhE spaces?
LAhE_post <- post_clause
# LAhE_no_SA_handling = pre_clause LAhE post_clause
# sumti descriptors
LE_clause <- LE_pre LE_post
LE_pre <- pre_clause LE spaces?
LE_post <- post_clause
# LE_no_SA_handling = pre_clause LE post_clause
# possibly ungrammatical text right quote
LEhU_clause <- LEhU_pre LEhU_post
LEhU_pre <- pre_clause LEhU spaces?
LEhU_post <- spaces?
# LEhU_clause_no_SA = LEhU_pre_no_SA LEhU_post
# LEhU_pre_no_SA = pre_clause LEhU spaces?
# LEhU_no_SA_handling = pre_clause LEhU post_clause
# convert number to sumti
LI_clause <- LI_pre LI_post
LI_pre <- pre_clause LI spaces?
LI_post <- post_clause
# LI_no_SA_handling = pre_clause LI post_clause
# grammatical text right quote
LIhU_clause <- LIhU_pre LIhU_post
LIhU_pre <- pre_clause LIhU spaces?
LIhU_post <- post_clause
# LIhU_no_SA_handling = pre_clause LIhU post_clause
# elidable terminator for LI
LOhO_clause <- LOhO_pre LOhO_post
LOhO_pre <- pre_clause LOhO spaces?
LOhO_post <- post_clause
# LOhO_no_SA_handling = pre_clause LOhO post_clause
# possibly ungrammatical text left quote
LOhU_clause <- LOhU_pre LOhU_post
LOhU_pre <- pre_clause LOhU spaces? (!LEhU any_word)* LEhU_clause spaces?
LOhU_post <- post_clause
# LOhU_no_SA_handling = pre_clause LOhU spaces? (!LEhU any_word)* LEhU_clause spaces?
# grammatical text left quote
LU_clause <- LU_pre LU_post
LU_pre <- pre_clause LU spaces?
LU_post <- spaces? si_clause? !ZEI_clause !BU_clause
# LU_post isn't post_clause for avoiding indicators to attach to LU in the parse tree.
# LU_no_SA_handling = pre_clause LU post_clause
# LAhE close delimiter
LUhU_clause <- LUhU_pre LUhU_post
LUhU_pre <- pre_clause LUhU spaces?
LUhU_post <- post_clause
# LUhU_no_SA_handling = pre_clause LUhU post_clause
# change MEX expressions to MEX operators
MAhO_clause <- MAhO_pre MAhO_post
MAhO_pre <- pre_clause MAhO spaces?
MAhO_post <- post_clause
# MAhO_no_SA_handling = pre_clause MAhO post_clause
# change numbers to utterance ordinals
MAI_clause <- MAI_pre MAI_post
MAI_pre <- pre_clause MAI spaces?
MAI_post <- post_clause
# MAI_no_SA_handling = pre_clause MAI post_clause
# converts a sumti into a tanru_unit
ME_clause <- ME_pre ME_post
ME_pre <- pre_clause ME spaces?
ME_post <- post_clause
# ME_no_SA_handling = pre_clause ME post_clause
# terminator for ME
MEhU_clause <- MEhU_pre MEhU_post
MEhU_pre <- pre_clause MEhU spaces?
MEhU_post <- post_clause
# MEhU_no_SA_handling = pre_clause MEhU post_clause
# change sumti to operand, inverse of LI
MOhE_clause <- MOhE_pre MOhE_post
MOhE_pre <- pre_clause MOhE spaces?
MOhE_post <- post_clause
# MOhE_no_SA_handling = pre_clause MOhE post_clause
# motion tense marker
MOhI_clause <- MOhI_pre MOhI_post
MOhI_pre <- pre_clause MOhI spaces?
MOhI_post <- post_clause
# MOhI_no_SA_handling = pre_clause MOhI post_clause
# change number to selbri
MOI_clause <- MOI_pre MOI_post
MOI_pre <- pre_clause MOI spaces?
MOI_post <- post_clause
# MOI_no_SA_handling = pre_clause MOI post_clause
# bridi negation
NA_clause <- NA_pre NA_post
NA_pre <- pre_clause NA spaces?
NA_post <- post_clause
# NA_no_SA_handling = pre_clause NA post_clause
# attached to words to negate them
NAI_clause <- NAI_pre NAI_post
NAI_pre <- pre_clause NAI spaces?
NAI_post <- post_clause
# NAI_no_SA_handling = pre_clause NAI post_clause
# scalar negation
NAhE_clause <- NAhE_pre NAhE_post
NAhE_pre <- pre_clause NAhE spaces?
NAhE_post <- post_clause
# NAhE_no_SA_handling = pre_clause NAhE post_clause
# change a selbri into an operator
NAhU_clause <- NAhU_pre NAhU_post
NAhU_pre <- pre_clause NAhU spaces?
NAhU_post <- post_clause
# NAhU_no_SA_handling = pre_clause NAhU post_clause
# change selbri to operand; inverse of MOI
NIhE_clause <- NIhE_pre NIhE_post
NIhE_pre <- pre_clause NIhE spaces?
NIhE_post <- post_clause
# NIhE_no_SA_handling = pre_clause NIhE post_clause
# new paragraph; change of subject
NIhO_clause <- sentence_sa* NIhO_pre NIhO_post
NIhO_pre <- pre_clause NIhO spaces?
NIhO_post <- su_clause* post_clause
# NIhO_no_SA_handling = pre_clause NIhO su_clause* post_clause
# attaches a subordinate clause to a sumti
NOI_clause <- NOI_pre NOI_post
NOI_pre <- pre_clause NOI spaces?
NOI_post <- post_clause
# NOI_no_SA_handling = pre_clause NOI post_clause
# abstraction
NU_clause <- NU_pre NU_post
NU_pre <- pre_clause NU spaces?
NU_post <- post_clause
# NU_no_SA_handling = pre_clause NU post_clause
# change operator to selbri; inverse of MOhE
NUhA_clause <- NUhA_pre NUhA_post
NUhA_pre <- pre_clause NUhA spaces?
NUhA_post <- post_clause
# NUhA_no_SA_handling = pre_clause NUhA post_clause
# marks the start of a termset
NUhI_clause <- NUhI_pre NUhI_post
NUhI_pre <- pre_clause NUhI spaces?
NUhI_post <- post_clause
# NUhI_no_SA_handling = pre_clause NUhI post_clause
# marks the middle and end of a termset
NUhU_clause <- NUhU_pre NUhU_post
NUhU_pre <- pre_clause NUhU spaces?
NUhU_post <- post_clause
# NUhU_no_SA_handling = pre_clause NUhU post_clause