-
Notifications
You must be signed in to change notification settings - Fork 0
/
coder.cp
executable file
·2328 lines (2075 loc) · 80.8 KB
/
coder.cp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// TABARI Project
//___________________________________________________________________________________
// coder.cp
// This file contains the routines for coding events from the parsed text
// Notes:
// 1. eventArray is filled sequentially until expandCompounds() is called, at which
// point it should be tranversed as a list using the nextEvt field.
//
// 2. Order that various contingencies are handled:
// a. Expand paired events makeEvent()=>putEvent()
// b. Duplicate events for compound phrases makeEvent()
// c. Resolve subordinant and dominant events evaluateEvents()
// d. Expand coded compound actors expandCompoundCodes()
// e. Eliminate duplicates checkEventDups()
// f. Eliminate events where source = target checkSameActors()
//
// 3. Syntactic agents are currently not implemented (coded agents are)
//
// ISSUES
// 1.The information about the issues to be coded is stored in a linked-list
// of issueHeadStructs that begins at issueHead. See the header file for the contents.
//
// 2.The codes themselves are stored in a linked lists of issueListStruct that come off
// the plist pointer in the header. This grows as needed (it will only have a single
// element except when TYPE = "ALL" is used. Rather than continually re-allocating
// this list, it is just zeroed out at the end of coding each record and re-used;
// so when the program terminates, it is the size of the largest number of codes
// found. This is a slight waste of memory but it is exceedingly unlikely that
// these lists will become sufficiently large as to threaten the program.
//
// 3.The "issues" code string is generated once, then concatenated to all of the
// records generated from the text.
//__________________________________________________________________________________
//
// Copyright (c) 2002-2012 Philip A. Schrodt. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted under the terms of the GNU General Public License:
// http://www.opensource.org/licenses/gpl-license.html
//
// Report bugs to: [email protected]
// The most recent version of this code is available from the KEDS Web site:
// http://eventdata.psu.edu
// For plausible indenting of this source code, set the tab size in your editor to "2"
//___________________________________________________________________________________
// Headers
#include "TABARI.h"
CoderClass Coder;
//___________________________________________________________________________________
// Global Variables
extern TabariFlagsClass TabariFlags;
extern TokenStoreClass TokenStore;
extern LiteralsClass Literals;
extern RootsClass Roots;
extern PhrasesClass Phrases;
extern ReadFilesClass ReadFiles;
extern CodeStoreClass CodeStore;
extern ParserClass Parser;
extern ProcessorClass Processor;
//___________________________________________________________________________________
// actorStruct utilities
bool CoderClass:: actorEqual(actorStruct a, actorStruct b)
{
if ((a.actor == b.actor) &&
(a.agent == b.agent) &&
(a.index == b.index)) return true;
else return false;
} // actorEqual
bool CoderClass:: actorEmpty (actorStruct a)
{
if (!a.actor && !a.agent && !a.index) return true;
else return false;
} // actorEmpty
void CoderClass:: zeroActor (actorStruct &a)
// set both elements of a to zero
{
a.actor = 0;
a.agent = 0;
a.index = 0;
a.where = 0;
a.rootidx = 0;
a.agtidx = 0;
} // zeroActor
void CoderClass:: setActor (actorStruct &a, actorStruct b)
// set a = b
{
a.actor = b.actor;
a.agent = b.agent;
a.index = b.index;
a.where = b.where;
a.rootidx = b.rootidx;
a.agtidx = b.agtidx;
} // setActor
//___________________________________________________________________________________
void CoderClass:: setcodeMode(char *s)
// sets the coding mode (called from .options)
{
if (strstr(s,"SEN")) codeMode = 1; // code one event per sentence
else if (strstr(s,"ALL")) codeMode = 2; // code all verbs
else codeMode = 0; // code one event per clause
} // setCodeMode
void CoderClass:: coderError(const char *s, int ierror)
// Write the error to <errorfile>
{
Processor.writeError("Coder error: ", (char *)s, ierror);
} // coderError
bool CoderClass:: setClauseBounds(void)
// set the conjunction boundaries istartClause and iendClause. If iendClause = -1,
// checks from the beginning of sentence; otherwise sets to the next clause
// after iendClause. Returns false if there are no more clauses.
// Note that there is no bounds checking because syntArray[0] and syntArray[iLex]
// always have tags
{
if (!Parser.fhasClauses || TabariFlags.fIgnoreConj) { // set bounds at 0 and iLex
if (iendClause >=0 ) return false; // we've already checked this one
else {
istartClause = 0;
iendClause = Parser.iLex;
return true;
}
}
// find next clause boundaries
if (iendClause >= 0) {
istartClause = iendClause + 1;
while ((istartClause <= Parser.iLex) &&
(!Parser.hasHeadTag(istartClause,Clause))) ++istartClause;
if (istartClause > Parser.iLex) return false; // no more clauses
if (iendClause > Parser.iLex) coderError("setClauseBounds1: iendClause > Parser.iLex",CLAUSE_END_ERROR);
}
else istartClause = 0;
iendClause = istartClause;
while (!Parser.hasTailTag(iendClause,Clause)) ++iendClause;
if (iendClause > Parser.iLex) coderError("setClauseBounds2: iendClause > Parser.iLex",CLAUSE_END_ERROR);
return true;
} // setClauseBounds
void CoderClass:: getActorStructCodes(actorStruct &actFound, int loc )
// attach codes to actFound based on the root at loc.
{
tokptr pt = NULL;
toktype istart;
while (Parser.hasHeadTag(loc,Pronoun)) { // this was a deferenced pronoun, so shift loc
loc = Parser.getReference(loc); // "while" handles multiple dereferencing
}
if (!Parser.syntArray[loc].iroot) {
Parser.parseError((char *)"empty actor root code",ACTOR_CODE_ERROR);
zeroActor(actFound);
return;
}
actFound.rootidx = Parser.syntArray[loc].iroot;
istart = Roots.rootArray[Parser.syntArray[loc].iroot].ppat->icode;
CodeStore.nextActorCode (actFound.actor, actFound.agent, &pt, istart);
if (pt) actFound.index = istart; // location of a complex code; otherwise zero
else actFound.index = 0;
actFound.where = loc;
} // getActorStructCodes
void CoderClass:: findActor (int loc, int &locFound, actorStruct &actFound)
// Find the beginning of next actor in syntArray with index >= loc;
// returns location and code if found; locfound is -1 if not found
{
bool found = false;
while ((loc <= iendClause) && !found) { // look for actor
if ((Actor == Parser.syntArray[loc].wtype) &&
Parser.hasHeadTag(loc,Actor)) { // check that actor *begins* after the verb
found = true;
getActorStructCodes(actFound,loc);
findAgent(loc, actFound);
}
else ++loc;
}
if (found) locFound = loc;
else locFound = -1;
} // Find_Actor
void CoderClass:: findAgent (int loc, actorStruct &actFound)
// Find the beginning of an actor immediately adjacent to loc, checking after first, then prior
// returns head location and code if found; locfound is -1 if not found
{
bool found = false;
int tloc = getActorTail(loc);
bool hasFormer = false;
// check for "FORMER" constructions
if ((Adjctv == Parser.syntArray[tloc+1].wtype) && // check for 'former' after actor
(Roots.rootFORMER == Parser.syntArray[tloc+1].iroot)) {
hasFormer = true;
++tloc;
}
if ((loc>0) && (!hasFormer) &&
(Adjctv == Parser.syntArray[loc-1].wtype) && // check for 'former' prior to actor
(Roots.rootFORMER == Parser.syntArray[loc-1].iroot)) {
hasFormer = true;
}
// check for agent following actor
if ((Agent == Parser.syntArray[tloc+1].wtype) &&
Parser.hasHeadTag(tloc+1,Agent)) { // check for agent after actor
found = true;
actFound.agtidx = Parser.syntArray[tloc+1].iroot;
// Processor.fprob << "fAgt:Mk1 " << actFound.agtidx << endl;
if (hasFormer) actFound.agent = CodeStore.codeElite;
else actFound.agent = Roots.rootArray[Parser.syntArray[tloc+1].iroot].ppat->icode;
}
else { // check for agent prior to actor
if (Prep == Parser.syntArray[loc-1].wtype) loc--; // optional skip over preposition
if ((Agent == Parser.syntArray[loc-1].wtype) &&
Parser.hasTailTag(loc-1,Agent)) { // check for agent prior to actor
// this code is a modified version of getActorHead
int istart = loc - 1;
while ((istart>=0) && (!Parser.hasHeadTag(istart,Agent))) --istart;
if (istart >= 0) {
found = true;
actFound.agtidx = Parser.syntArray[istart].iroot;
if ((istart>0) && (!hasFormer) &&
(Adjctv == Parser.syntArray[istart-1].wtype) && // check for 'former' prior to actor
(Roots.rootFORMER == Parser.syntArray[istart-1].iroot)) {
hasFormer = true;
}
if (hasFormer) actFound.agent = CodeStore.codeElite;
else actFound.agent = Roots.rootArray[Parser.syntArray[istart].iroot].ppat->icode;
// actFound.agloc = istart;
}
}
}
if (!found) { // probably redundant if initialization wasn't wiped out but let's be sure...
actFound.agent = 0;
actFound.agtidx = 0;
}
} // findAgent
void CoderClass:: backActor (int loc, int &locFound, actorStruct &actFound)
// Find the beginning of next actor in syntArray with index <= loc;
// returns location of initial word and code if found; zero if not found
{
bool found = false;
while ((loc >= istartClause) && !found) { // look for an actor
if (Actor == Parser.syntArray[loc].wtype) {
loc = getActorHead(loc,found); // adjust in case we backed into this
if (!found) { // headtag may have been deactivated in nullSubord
locFound = -1;
return;
}
getActorStructCodes(actFound,loc);
findAgent(loc, actFound);
}
else --loc;
}
if (found) locFound = loc;
else locFound = -1;
} // backActor
int CoderClass:: getActorHead(int loc, bool &fmatch)
// moves the entire actor root into temp; returns location of root head;
// set fmatch = true.
// This does not do error checking -- it assumes the tags exist
// 03.04.20: however, looks like there is a bunch of asserts and bounds checking
// in the 'do' block remaining from some earlier debugging [pas]
{
int istart, ka;
if (Parser.hasHeadTag(loc,Pronoun)) { // this was a deferenced pronoun, so shift loc
loc = Parser.getReference(loc);
}
istart = loc;
while ((istart>=0) && (!Parser.hasHeadTag(istart,Actor))) --istart;
if (istart<0) {
fmatch = false;
return loc;
}
ka = istart-1; // transfer actor indices to tempArray
do {
++ka;
tempArray[iTemp++] = ka;
if ((ka > Parser.iLex) || (iTemp >= MAX_TEMP)) {
if (iTemp >= MAX_TEMP) {
coderError("Excessive words between nonconsecutive elements of a phrase",MAX_PHRASE_ERROR);
break; // ### <09.11.12> this seems to be the correct way out, but haven't analyzed it fully. Earlier code is below, but I'm not sure it was ever invoked
// fmatch = false;
// return loc;
}
WriteAlert("Coder: gAH bounds error"); // this indicates a tagging error: shouldn't hit it
Pause(); // ### probably should drop this... <09.11.12>
break;
}
} while (!Parser.hasTailTag(ka,Actor));
fmatch = true;
return istart;
} // getActorHead
int CoderClass:: getActorTail(int loc)
// Returns location of actor root tail;
// This does not do error checking -- it assumes the tail tag exists
{
if (Parser.hasHeadTag(loc,Pronoun)) { // deferenced pronoun, so tail is same as head
return loc;
}
while ((loc < iendClause) && (!Parser.hasTailTag(loc,Actor))) ++loc;
return loc;
} // getActorTail
int CoderClass:: getCompound(int loc, bool &fmatch)
// moves the entire compound phrase into temp; returns location of phrase head;
// Version called from checkPattern; sets fmatch = true.
// Returns -1, which flags the absence of a compound, if corresponding tags don't exist or don't correspond
{
int ka;
int istart;
if (Parser.hasHeadTag(loc, Compound)) { // checking forward from the head
if (!Parser.checkTailTag(loc,Compound)) return -1; // no corresponding tail tag
istart = loc;
}
else { // check back from the tail
istart= getCompound(loc); // calling a different variant of the function, not recursive
if (istart < 0) return -1; // no corresponding head tag
}
ka = istart-1; // transfer indices of compound phrase to tempArray
do {
++ka;
if (iTemp >= MAX_TEMP) break; // just ignore the rest of the match
tempArray[iTemp++] = ka;
} while (!Parser.hasTailTag(ka,Compound)); // no need to check indices; we know tags are okay
fmatch = true;
return istart;
} // getCompound
int CoderClass:: getCompound(int loc)
// returns location of phrase head only;version called from makeEvent
// Returns -1 if no corresponding tag can be found
{
int istart = loc;
toktype tagidx;
// Parser.writeParsing("Called from getCompound"); // debug
tagidx = Parser.getTailTag(loc, Compound);
if (!tagidx) return -1;
while ((istart>=0) && (!Parser.checkHeadTag(istart,Compound,tagidx))) --istart;
return istart;
} // getCompound
void CoderClass:: saveState(void)
// save prior to evaluating synset or conditional
{
fcond = true;
condloc = locSynt;
condtemp = iTemp;
condlen = lenalt;
condfskip = fskipMode;
} // saveState
void CoderClass:: restoreState(void)
// save conditional state
{
// fcond = false; // this is probably redundant as this is reset elsewhere
locSynt = condloc; // restore conditional state
lenalt = condlen;
iTemp = condtemp;
fskipMode = condfskip;
} // restoreState
void CoderClass:: initCondit(void)
// save conditional state
{
saveState();
if (isHigh) ptPat += 2; // skip connector; go to next literal
else ptPat -= 2;
} // initCondit
void CoderClass:: condCheck(int &state)
// see if we have alternatives in conditional if current option failed
{
if (isHigh) {
while ((OR_LIT != *ptPat) && (CLBRACK_LIT != *ptPat)) ptPat += 2; // move past current pattern ### bounds check here? also could jump by += 2
if (CLBRACK_LIT == *ptPat) return; // last option, so match failed
}
else {
while ((OR_LIT != *ptPat) && (OPBRACK_LIT != *ptPat)) ptPat -= 2; // move past pattern ### bounds check here?
if (OPBRACK_LIT == *ptPat) return; // last option, so match failed
}
restoreState();
if (isHigh) ptPat += 2; // skip connector; go to next literal
else ptPat -= 2;
if (*(ptPat+1) & Phrases.connPartNext) fullMode = false;// set match mode using new pattern
else fullMode = true;
state = 2; // and keep trying!
} // condCheck
int CoderClass:: compSynset(void)
// Compares the synset patterns at rootArray[*ptPat] with syntArray location locSynt;
// returns same values as compTarget [below].
{
int match = 0;
patStruct *pPat = Roots.rootArray[*ptPat].ppat;
pPat = pPat->pnext; // go to start of list
while (pPat) {
#if DEBUG_PAT
{instring s; // *** debug
toktype * ptok = pPat->phrase;
Processor.fprob << "compSynt: " << Phrases.get(s, pPat->phrase) << endl; // *** debug: this is the phrase we are trying to match
Processor.fprob << "Phrase list :" << endl;
while (*ptok) {
Processor.fprob << " " << *ptok << ": " << Literals.litArray[*ptok].pchar << " -- " << *(++ptok) << endl; // get the text and the value of the connector
++ptok;
}
}
#endif
if ((match = checksubPattern(pPat))) { // deliberation use of assignment
// if (iTemp < MAX_TEMP) tempArray[iTemp++] = locSynt; // ### doesn't want this, for some reason...phrase extraction is working fine <12.01.20>
// if (fcond) lenalt += strlen(Literals.litArray[*ptPat].pchar); // update length of matched conditional
// Processor.fprob << "Match succeeded" << endl;
return match;
}
pPat = pPat->pnext;
}
return 0;
} // compSynset
int CoderClass:: checksubPattern(patStruct *pPat)
// check for a subpattern in synset
{
int result = 0;
int state = 2;
tokptr saveptPat = ptPat; // pointer to current pattern location
tokptr saveptThisPat = ptThisPat; // pointer to pattern being evaluated
int savelocSynt = locSynt; // current text location
ptThisPat = ptPat = pPat->phrase;
#if DEBUG_PAT
instring s; // *** debug
Processor.fprob << "subPat >> Phrase: " << Phrases.get(s, pPat->phrase) << endl;
#endif
if (isHigh) {
while (2 == state) {
if (Phrases.flagSynm & *(ptPat+1)) {
// Processor.fprob << "subPat >> evaluating synset: " << Phrases.get(s, pPat->phrase) << endl;
result = compSynset();
}
else result = compTarget();
state = stateCheck(result);
}
ptPat = saveptPat;
ptThisPat = saveptThisPat;
locSynt = savelocSynt;
if (state) return 2;
else return 0;
}
else { // lower phrase
while (2 == state) {
if (Phrases.flagSynm & *(ptPat+1)) {
int docheck = 1;
saveState();
while (docheck){ // deal with possibility of multiple words in synset patterns
result = compSynset();
if (!result) docheck = continueCheck();
else docheck = 0;
}
if (!result) {
fcond = false;
restoreState();
}
}
else result = compTarget();
state = stateCheck(result);
}
ptPat = saveptPat;
ptThisPat = saveptThisPat;
locSynt = savelocSynt;
if (state) return 2;
else return 0;
}
} // checksubPattern
int CoderClass:: compTarget(void)
// Compares current pattern target *ptPat with syntArray location locSynt;
// returns
// 0 if match failed
// 1 if partial match
// 2 if exact match (also returned for actors)
//
// When a $,+ or % actor is matched, the match records the entire root, but skips only the
// first literal rather than the entire root. This allows a phrase to contain words that are
// part of a root, which KEDS did not. If the skip token ^ is used, the entire root
// is passed over.
{
bool fmatch = false;
int match = 0;
#if DEBUG_PAT
Processor.fprob << "compTarget: " << Parser.lexArray[locSynt].ptext << endl;
#endif
if (*ptPat >= SYMBOL_LIMIT) { // check for match of a literal
if ((match = Parser.hasLitr(locSynt, *ptPat))) { // deliberation use of assignment
if (iTemp < MAX_TEMP) tempArray[iTemp++] = locSynt; // if bounds overflow, just ignore
if (fcond) lenalt += strlen(Literals.litArray[*ptPat].pchar); // update length of matched conditional
#if DEBUG_PAT
Processor.fprob << " cT match: " << match << endl;
#endif
return match;
}
else return 0;
}
else if ((SOURCE_LIT == *ptPat) && (Actor == Parser.syntArray[locSynt].wtype)) {
indexSource = getActorHead(locSynt,fmatch);
}
else if ((TARGET_LIT == *ptPat) && (Actor == Parser.syntArray[locSynt].wtype)) {
indexTarget = getActorHead(locSynt,fmatch);
}
else if ((ATTRIB_LIT == *ptPat) && (Actor == Parser.syntArray[locSynt].wtype)) {
indexAttrib = getActorHead(locSynt,fmatch);
}
else if ((SKIP_LIT == *ptPat) && (Actor == Parser.syntArray[locSynt].wtype)) {
int iscr = iTemp;
if (isHigh) {
getActorHead(locSynt,fmatch);
if (fmatch) locSynt = getActorTail(locSynt);
}
else locSynt = getActorHead(locSynt,fmatch); // move past the actor
iTemp = iscr; // don't store skipped text
}
else if ((COMPOUND_LIT == *ptPat) && (Parser.hasHeadTag(locSynt, Compound))) {
indexCompound = getCompound(locSynt, fmatch);
}
if (fmatch) return 2;
else return 0;
} // compTarget
int CoderClass:: continueCheck(void)
// move in text without changing target and mode
// returns 0 at end of text, 2 otherwise
{
if (isHigh) {
++locSynt;
if (locSynt > iendClause) return 0;
}
else {
--locSynt;
if (locSynt < 0) return 0;
}
#if DEBUG_PAT
Processor.fprob << "cC: locSynt " << locSynt << endl;
#endif
return 2;
} // continueCheck
int CoderClass:: nextCheck(void)
// increment text and pattern: change target and mode
// returns
// 0 end of text
// 1 end of pattern
// 2 otherwise
{
if (isHigh) {
if (fcond) {
if ((OR_LIT == *(ptPat+2) || (CLBRACK_LIT == *(ptPat+2)))) { // skip remaining pattern if at end of internal pattern
fcond = false;
while (CLBRACK_LIT != *ptPat) ++ptPat; // move to close of pattern ### increment by 2?
}
}
// Processor.fprob << "nC: ptPat conn " << *(ptPat+1) << " w/mask " << (*(ptPat+1) & Phrases.connFullSkip) << endl;
if (*(ptPat+1) & Phrases.connFullSkip) fskipMode = true; // set skip mode using current pattern
else fskipMode = false;
ptPat += 2;
if (!*ptPat) return 1; // end of pattern
++locSynt;
// Processor.fprob << "nC: locSynt " << locSynt << " phrase element " << (ptPat - ptThisPat) << endl;
if (locSynt > iendClause) return 0; // end of text
}
else {
ptPat -= 2;
if (ptPat < ptThisPat) return 1; // beginning of pattern
if (fcond) {
if ((OR_LIT == *ptPat) || (OPBRACK_LIT == *ptPat)) { // skip remaining pattern
fcond = false;
while (OPBRACK_LIT != *ptPat) ptPat -= 2; // move to close of pattern
ptPat -= 2; // move to element prior to conditional
}
}
if (*(ptPat+1) & Phrases.connFullSkip) fskipMode = true; // set skip mode using new pattern
else fskipMode = false;
--locSynt;
if (locSynt < 0) return 0; // beginning of text
}
if (*(ptPat+1) & Phrases.connPartNext) fullMode = false;// set match mode using new pattern
else fullMode = true;
#if DEBUG_PAT
Processor.fprob << "nC: locSynt " << locSynt << " phrase element " << (ptPat - ptThisPat) << endl;
#endif
return 2;
} // nextCheck
int CoderClass:: stateCheck(int result)
// implements response to result based on mode, then returns state of pattern check
// returns
// 0 pattern failed
// 1 pattern succeeded
// 2 continue checking
// Note that is translates into a simple lookup table, which might (or might not)
// be faster
{
#if DEBUG_PAT
Processor.fprob << "stChk: fullMode " << fullMode << " skipMode " << fskipMode << " result " << result << endl;
#endif
if (fskipMode) {
if (fullMode) {
if (2 == result) return nextCheck();
else return continueCheck();
}
else {
if (result) return nextCheck();
else return continueCheck();
}
}
else {
if (fullMode) {
if (2 == result) return nextCheck();
else return 0;
}
else {
if (result) return nextCheck();
else return 0;
}
}
} // stateCheck
bool CoderClass:: checkPattern(patStruct *pPat, int loc, wordtype wtype )
// This is the core routine for checking verb patterns. It does the following
// 1. Finds the verb marker in the pattern phrase
// 2. Attempts to match the literals following the verb ("upper phrase")
// 3. If successful, attempts to match the literals prior to the verb ("lower phrase")
// (note that "upper" and "lower" are in reference to the array indices, and
// unfortunately this is the opposite of the orientation of the parsed display...)
//
// The matching of the upper phrase begins immediately after the *end* of the verb root.
// The matching of the lower phrase begins immediately prior to the start of the verb root
//
// The syntArray indices of the literals in the matched string, including the roots, are
// put in the tempArray, with the upper string first, followed by -1, followed by the
// lower string, followed by -2. The variables indexSource, indexTarget, indexAttrib,
// and indexCompound can also be set by this.
//
// <09.01.02> ### this probably should not match across clause boundaries, right?
{
tokptr pverb; // save location of verb
int result = 0;
int state = 2;
#if DEBUG_PAT
instring s; // *** debug
Processor.fprob << "\nchkPat >> Phrase: " << Phrases.get(s, pPat->phrase) << endl;
#endif
tempArray[0] = loc; // record verb as first element of tempArray
iTemp = 1;
lenalt = 0; // length of text matched in alternatives
fcond = false; // doing conditional evaluation?
ptThisPat = ptPat = pPat->phrase;
locSynt = loc;
while (*ptPat && (*ptPat != VERB_LIT)) ptPat +=2; // find the verb in the pattern
if (!*ptPat) return false; // no verb in pattern :this should have been caught at input
pverb = ptPat; // save value
while ((Halt != Parser.syntArray[locSynt].wtype) &&
(!Parser.hasTailTag(locSynt,wtype))) ++locSynt; // set highloc to the end of the wtype root;
// assert(Halt != Parser.syntArray[locSynt].wtype); // if fails, tail wasn't marked so something is wrong...
// do the upper (forward) match
// Processor.fprob << "Coder:cP -- upper match\n"; // *** debug
isHigh = true;
state = nextCheck();
while (2 == state) {
if (Phrases.flagSynm & *(ptPat+1)) {
// Processor.fprob << "Coder:chkPat -- Synset detected\n"; // *** debug
result = compSynset();
}
else {
if (OPBRACK_LIT == *ptPat) initCondit();
result = compTarget();
}
state = stateCheck(result);
if (!state && fcond) condCheck(state);
}
if (!state) return false;
if (iTemp > MAX_TEMP - 2) {
// Processor.fprob << "cP:Mk1\n";
iTemp = MAX_TEMP - 2; // could occur with very long phrases, so overwrite part of this
}
tempArray[iTemp++] = -1; // mark end of upper phrase
if (pverb == pPat->phrase) {
tempArray[iTemp++] = -2; // mark end of lower phrase
return true; // no lower phrase, so we're done
}
// Now do the lower phrase...
// Processor.fprob << "Coder:cP -- lower match\n"; // *** debug
isHigh = false;
ptPat = pverb;
locSynt = loc;
state = nextCheck();
while (2 == state) {
if (Phrases.flagSynm & *(ptPat+1)) {
int docheck = 1;
saveState();
while (docheck){ // deal with possibility of multiple words in synset patterns
result = compSynset();
if (!result) docheck = continueCheck();
else docheck = 0;
}
if (!result) {
fcond = false;
restoreState();
}
// else ptPat -= 2; // skip the synflag and go to the slot containing the connector
}
else {
if (CLBRACK_LIT == *ptPat) initCondit();
result = compTarget();
}
state = stateCheck(result);
if (!state && fcond) condCheck(state);
}
if (!state) return false;
if (iTemp > MAX_TEMP - 1) iTemp = MAX_TEMP - 1; // could occur with very long phrases, so overwrite part of this
tempArray[iTemp++] = -2; // mark end of lower phrase
return true;
} // checkPattern
void CoderClass:: setDefaultSource(void)
// sets theSource to the default
{
// Processor.fprob << "Got a defsrc\n";
theSource.rootidx = Roots.rootDefActor; // DEFAULT_ACTOR root
theSource.actor = idxcodeDefSrc;
theSource.agent = 0;
theSource.index = 0;
theSource.where = MAX_SYNT - 1; // this is the placeholder
fRegularSource = true; // well, sort of regular...signals that we've set a source
} // setDefaultSource
void CoderClass:: setDefaultTarget(void)
// sets the Target to the default
{
// Processor.fprob << "Got a deftar\n";
theTarget.rootidx = Roots.rootDefActor;
theTarget.actor = idxcodeDefTar;
theTarget.agent = 0;
theTarget.index = 0;
theTarget.where = MAX_SYNT - 1; // this is the placeholder
fRegularTarget = true;
} // setDefaultTarget
void CoderClass:: getRegularSource(int index)
// Regular source finder; this is used if the source is not found by a pattern
// index is the location of the verb, which is used if the default source PRIOR option is true
{
int loc = istartClause; // start search from beginning of clause
int locfound = 0; // forces while loop to execute at least once
bool found = false;
actorStruct iAct;
// Processor.fprob << "fhasdefsrc: " << fhasDefltSrc << "\n";
while (!found && (locfound >= 0))
{
findActor(loc, locfound, iAct);
if (locfound >= 0) {
if (fDefPRIOR && (locfound > index)) { // SOURCE option: only look for actor prior to verb
setDefaultSource();
return;
}
if (!actorEqual(iAct, theTarget)) {
setActor(theSource, iAct);
found = true;
fRegularSource = true;
// Processor.fprob << "found source: " << theSource.actor << " at " << locfound << "\n";
}
else loc = locfound + 1; // already have that one, so try another
}
}
if ((!found) && fhasDefltSrc) setDefaultSource();
return;
} // getRegularSource
void CoderClass:: getRegularTarget (int index)
{
int loc;
int locfound = 0; // forces while loop to execute once
bool found = false;
actorStruct iAct;
loc = index + 1; // check after the verb }
while (!found && (locfound >= 0)) {
findActor(loc, locfound, iAct);
if (locfound >= 0) {
if (!actorEqual(iAct, theSource)) {
setActor(theTarget,iAct);
found = true;
fRegularTarget = true;
}
else loc = locfound + 1;
}
}
// Processor.fprob << " gDT:Mk1 " << theSource.agtidx << " " << theTarget.agtidx << endl; // debug
if (found) return;
if (fDefAFTER) { // TARGET option: only look for actor prior to verb
setDefaultTarget();
return;
}
loc = index - 1; // try looking before the verb
locfound = 0;
while (!found && (locfound >= 0)) { // ### check this condition
backActor(loc, locfound, iAct);
if (locfound >= 0) {
if (!actorEqual(iAct, theSource)) {
setActor(theTarget,iAct);
found = true;
}
else loc = locfound - 1;
}
}
// Processor.fprob << " gDT:Mk2 " << theSource.agtidx << " " << theTarget.agtidx << endl; // debug
if (found) return;
else // if there is a compound actor make that the target
if ((theSource.index) ||
(Parser.syntArray[theSource.where].flags & Parser.setCompd))
setActor(theTarget, theSource);
else if (fhasDefltSrc) setDefaultTarget();
} // getRegularTarget
bool CoderClass:: checkAttribCode (patStruct * pPat)
// Check for complexity, discard and null codes in attribution patterns.
// Returns false if null code, true if discard or complex code
{
int ka = TokenStore.getTokenValue(pPat->icode);
// cout << "cACAe\n";
if (ka == CodeStore.indexNull) return false ;
else if (ka == CodeStore.indexDiscard) {
Parser.fHasDiscard = true;
// cout << "cACAx2\n";
return true;
}
else if (ka == CodeStore.indexComplex) {
Parser.fIsComplex = true;
Parser.fCplxStrg = true;
strcpy(Parser.sComplex,"Complex code in attribution phrase");
// cout << "cACAx3\n";
return true;
}
return false; // ### is this correct?? -- should we ever get here? <05.04.14>
} // checkAttribCode
void CoderClass:: getAttribActor (int loc)
// Find the attribution actor for phrase at loc:
// 0. do nothing if this has already been set by a pattern;
// 1. look before the phrase, stopping at a comma
// 2. look for first actor in the sentence
{
bool found = false;
int locfound;
if(!actorEmpty(theAttrib)) return; // attribution already set by a pattern
while ((loc >= istartClause) &&
(Comma != Parser.syntArray[loc].wtype) && !found) { // look for an actor
if (Actor == Parser.syntArray[loc].wtype) {
loc = getActorHead(loc,found); // adjust in case we backed into this
if (found) getActorStructCodes(theAttrib,loc);
else --loc;
}
else --loc;
} // while
if (found) return; // got it before the phrase, so we're done
// otherwise look from the beginning of the sentence
iendClause = Parser.iLex;// consider entire sentence
findActor(0, locfound, theAttrib);
} // getAttribActor
void CoderClass:: doAttrib(void)
// Check for attribution
// Stops at the first non-null-coded phrase encountered
{
int index = Parser.iLex; // check from the end of the sentence
patStruct * pPat;
// instring s; // *** debug
zeroActor(theAttrib);
while (index >= 0) {
if (Parser.hasTailTag(index,Attrib)) {
while ((index>=0) && (!Parser.hasHeadTag(index,Attrib))) --index; // find head of phrase
if (index < 0) {
coderError("No head tag on attribution phrase",HEAD_ATTRIB_ERROR); // should not hit this...
return;
}
pPat = Roots.rootArray[Parser.syntArray[index].iroot].ppat;
// cout << "\nDA:cT1: " << Phrases.get(s, Roots.rootArray[Parser.syntArray[index].iroot].phrase) << endl;
while (pPat->pnext) { // go through phrases
pPat = pPat->pnext;
// cout << "DA:cT2: " << Phrases.get(s, pPat->phrase) << endl;
if (checkPattern(pPat, index, Attrib)) { // pattern matches
++pPat->used;
if (checkAttribCode(pPat)) return; // complex or discard code
if (indexAttrib>=0) getActorStructCodes(theAttrib, indexAttrib); // attrib was set in a pattern
else getAttribActor(index); // get the actor using default rules
// cout << "Attribution located at " << theAttrib.where << endl;
return;
} // if
} // while
// if no match, try the main phrase
if (Roots.rootArray[Parser.syntArray[index].iroot].ppat->icode) {
// CodeStore.getEventString(s,Roots.rootArray[Parser.syntArray[index].iroot].ppat->icode);
// cout << "DA:cT3: " << s << endl;
if (checkAttribCode(Roots.rootArray[Parser.syntArray[index].iroot].ppat)) return; // complex or discard code
getAttribActor(index); // get the actor
// cout << "Attribution located at " << theAttrib.where << endl;
return;
}
} // if tailtag
--index;
} // while index
} // doAttrib
void CoderClass:: checkTime(void)
// Check for time-shift phrases.
// Currently stops at the first phrase encountered
{
int index = 0;
patStruct * pPat;
litstring s; // ### debug
while (Parser.syntArray[index].wtype != Halt) {
if (Parser.hasHeadTag(index,Time)) {
pPat = Roots.rootArray[Parser.syntArray[index].iroot].ppat;
// Processor.fprob << "\nCC:cT1: " << Phrases.get(s, Roots.rootArray[Parser.syntArray[index].iroot].phrase) << endl; // ### debug
while (pPat->pnext) { // go through phrases
pPat = pPat->pnext;
// Processor.fprob << "CC:cT2: " << Phrases.get(s, pPat->phrase) << endl;// ### debug
if (checkPattern(pPat, index, Time)) { // pattern matches
++pPat->used;
if (shiftDate(pPat)) return; // continue search if a null code
}
}
// if no match, try the main phrase
if (Roots.rootArray[Parser.syntArray[index].iroot].ppat->icode) {
CodeStore.getEventString(s,Roots.rootArray[Parser.syntArray[index].iroot].ppat->icode); // ### debug
// Processor.fprob << "CC:cT3: " << s << endl;// ### debug