-
Notifications
You must be signed in to change notification settings - Fork 0
/
cweave.w
4652 lines (4172 loc) · 165 KB
/
cweave.w
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% This file is part of CWEB.
% This program by Silvio Levy and Donald E. Knuth
% is based on a program by Knuth.
% It is distributed WITHOUT ANY WARRANTY, express or implied.
% Version 3.64 --- February 2002
% (essentially the same as version 3.6, which added
% recently introduced features of standard C++ to version 3.4)
% (In November 2016 I made minor adjustments but changed no code -- DEK)
% Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
% Permission is granted to make and distribute verbatim copies of this
% document provided that the copyright notice and this permission notice
% are preserved on all copies.
% Permission is granted to copy and distribute modified versions of this
% document under the conditions for verbatim copying, provided that the
% entire resulting derived work is given a different name and distributed
% under the terms of a permission notice identical to this one.
% Here is TeX material that gets inserted after \input cwebmac
\def\hang{\hangindent 3em\indent\ignorespaces}
\def\pb{$\.|\ldots\.|$} % C brackets (|...|)
\def\v{\char'174} % vertical (|) in typewriter font
\def\dleft{[\![} \def\dright{]\!]} % double brackets
\mathchardef\RA="3221 % right arrow
\mathchardef\BA="3224 % double arrow
\def\({} % ) kludge for alphabetizing certain section names
\def\TeXxstring{\\{\TEX/\_string}}
\def\skipxTeX{\\{skip\_\TEX/}}
\def\copyxTeX{\\{copy\_\TEX/}}
\def\title{CWEAVE (Version 3.64)}
\def\topofcontents{\null\vfill
\centerline{\titlefont The {\ttitlefont CWEAVE} processor}
\vskip 15pt
\centerline{(Version 3.64)}
\vfill}
\def\botofcontents{\vfill
\noindent
Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
\bigskip\noindent
Permission is granted to make and distribute verbatim copies of this
document provided that the copyright notice and this permission notice
are preserved on all copies.
\smallskip\noindent
Permission is granted to copy and distribute modified versions of this
document under the conditions for verbatim copying, provided that the
entire resulting derived work is given a different name and distributed
under the terms of a permission notice identical to this one.
}
\pageno=\contentspagenumber \advance\pageno by 1
\let\maybe=\iftrue
@s not_eq normal @q unreserve a C++ keyword @>
@** Introduction.
This is the \.{CWEAVE} program by Silvio Levy and Donald E. Knuth,
based on \.{WEAVE} by Knuth.
We are thankful to Steve Avery,
Nelson Beebe, Hans-Hermann Bode (to whom the original \CPLUSPLUS/ adaptation
is due), Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
Joachim Schrod, Lee Wittenberg, Saroj Mahapatra, Cesar Augusto Rorato
Crusius, and others who have contributed improvements.
The ``banner line'' defined here should be changed whenever \.{CWEAVE}
is modified.
@d banner "This is CWEAVE (Version 3.64)\n"
@c @<Include files@>@/
@h
@<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
@<Typedef declarations@>@/
@<Global variables@>@/
@<Predeclaration of procedures@>
@ We predeclare several standard system functions here instead of including
their system header files, because the names of the header files are not as
standard as the names of the functions. (For example, some \CEE/ environments
have \.{<string.h>} where others have \.{<strings.h>}.)
@<Predecl...@>=
extern int strlen(); /* length of string */
extern int strcmp(); /* compare strings lexicographically */
extern char* strcpy(); /* copy one string to another */
extern int strncmp(); /* compare up to $n$ string characters */
extern char* strncpy(); /* copy up to $n$ string characters */
@ \.{CWEAVE} has a fairly straightforward outline. It operates in
three phases: First it inputs the source file and stores cross-reference
data, then it inputs the source once again and produces the \TEX/ output
file, finally it sorts and outputs the index.
Please read the documentation for \.{common}, the set of routines common
to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
@c
int main (ac, av)
int ac; /* argument count */
char **av; /* argument values */
{
argc=ac; argv=av;
program=cweave;
make_xrefs=force_lines=make_pb=1; /* controlled by command-line options */
common_init();
@<Set initial values@>;
if (show_banner) printf(banner); /* print a ``banner line'' */
@<Store all the reserved words@>;
phase_one(); /* read all the user's text and store the cross-references */
phase_two(); /* read all the text again and translate it to \TEX/ form */
phase_three(); /* output the cross-reference index */
return wrap_up(); /* and exit gracefully */
}
@ The following parameters were sufficient in the original \.{WEAVE} to
handle \TEX/, so they should be sufficient for most applications of \.{CWEAVE}.
If you change |max_bytes|, |max_names|, |hash_size|, or |buf_size|
you have to change them also in the file |"common.w"|.
@d max_bytes 90000 /* the number of bytes in identifiers,
index entries, and section names */
@d max_names 4000 /* number of identifiers, strings, section names;
must be less than 10240; used in |"common.w"| */
@d max_sections 2000 /* greater than the total number of sections */
@d hash_size 353 /* should be prime */
@d buf_size 100 /* maximum length of input line, plus one */
@d longest_name 10000 /* section names and strings shouldn't be longer than this */
@d long_buf_size (buf_size+longest_name)
@d line_length 80 /* lines of \TEX/ output have at most this many characters;
should be less than 256 */
@d max_refs 20000 /* number of cross-references; must be less than 65536 */
@d max_toks 20000 /* number of symbols in \CEE/ texts being parsed;
must be less than 65536 */
@d max_texts 4000 /* number of phrases in \CEE/ texts being parsed;
must be less than 10240 */
@d max_scraps 2000 /* number of tokens in \CEE/ texts being parsed */
@d stack_size 400 /* number of simultaneous output levels */
@ The next few sections contain stuff from the file |"common.w"| that must
be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
file |"common.h"|, which needs to be updated when |"common.w"| changes.
@i common.h
@* Data structures exclusive to {\tt CWEAVE}.
As explained in \.{common.w}, the field of a |name_info| structure
that contains the |rlink| of a section name is used for a completely
different purpose in the case of identifiers. It is then called the
|ilk| of the identifier, and it is used to
distinguish between various types of identifiers, as follows:
\yskip\hang |normal| and |func_template| identifiers are part of the
\CEE/ program that will appear in italic type (or in typewriter type
if all uppercase).
\yskip\hang |custom| identifiers are part of the \CEE/ program that
will be typeset in special ways.
\yskip\hang |roman| identifiers are index entries that appear after
\.{@@\^} in the \.{CWEB} file.
\yskip\hang |wildcard| identifiers are index entries that appear after
\.{@@:} in the \.{CWEB} file.
\yskip\hang |typewriter| identifiers are index entries that appear after
\.{@@.} in the \.{CWEB} file.
\yskip\hang |alfop|, \dots, |template_like|
identifiers are \CEE/ or \CPLUSPLUS/ reserved words whose |ilk|
explains how they are to be treated when \CEE/ code is being
formatted.
@d ilk dummy.Ilk
@d normal 0 /* ordinary identifiers have |normal| ilk */
@d roman 1 /* normal index entries have |roman| ilk */
@d wildcard 2 /* user-formatted index entries have |wildcard| ilk */
@d typewriter 3 /* `typewriter type' entries have |typewriter| ilk */
@d abnormal(a) (a->ilk>typewriter) /* tells if a name is special */
@d func_template 4 /* identifiers that can be followed by optional template */
@d custom 5 /* identifiers with user-given control sequence */
@d alfop 22 /* alphabetic operators like \&{and} or \&{not\_eq} */
@d else_like 26 /* \&{else} */
@d public_like 40 /* \&{public}, \&{private}, \&{protected} */
@d operator_like 41 /* \&{operator} */
@d new_like 42 /* \&{new} */
@d catch_like 43 /* \&{catch} */
@d for_like 45 /* \&{for}, \&{switch}, \&{while} */
@d do_like 46 /* \&{do} */
@d if_like 47 /* \&{if}, \&{ifdef}, \&{endif}, \&{pragma}, \dots */
@d delete_like 48 /* \&{delete} */
@d raw_ubin 49 /* `\.\&' or `\.*' when looking for \&{const} following */
@d const_like 50 /* \&{const}, \&{volatile} */
@d raw_int 51 /* \&{int}, \&{char}, \dots; also structure and class names */
@d int_like 52 /* same, when not followed by left parenthesis or \DC\ */
@d case_like 53 /* \&{case}, \&{return}, \&{goto}, \&{break}, \&{continue} */
@d sizeof_like 54 /* \&{sizeof} */
@d struct_like 55 /* \&{struct}, \&{union}, \&{enum}, \&{class} */
@d typedef_like 56 /* \&{typedef} */
@d define_like 57 /* \&{define} */
@d template_like 58 /* \&{template} */
@ We keep track of the current section number in |section_count|, which
is the total number of sections that have started. Sections which have
been altered by a change file entry have their |changed_section| flag
turned on during the first phase.
@<Global...@>=
boolean change_exists; /* has any section changed? */
@ The other large memory area in \.{CWEAVE} keeps the cross-reference data.
All uses of the name |p| are recorded in a linked list beginning at
|p->xref|, which points into the |xmem| array. The elements of |xmem|
are structures consisting of an integer, |num|, and a pointer |xlink|
to another element of |xmem|. If |x=p->xref| is a pointer into |xmem|,
the value of |x->num| is either a section number where |p| is used,
or |cite_flag| plus a section number where |p| is mentioned,
or |def_flag| plus a section number where |p| is defined;
and |x->xlink| points to the next such cross-reference for |p|,
if any. This list of cross-references is in decreasing order by
section number. The next unused slot in |xmem| is |xref_ptr|.
The linked list ends at |&xmem[0]|.
The global variable |xref_switch| is set either to |def_flag| or to zero,
depending on whether the next cross-reference to an identifier is to be
underlined or not in the index. This switch is set to |def_flag| when
\.{@@!} or \.{@@d} is scanned, and it is cleared to zero when
the next identifier or index entry cross-reference has been made.
Similarly, the global variable |section_xref_switch| is either
|def_flag| or |cite_flag| or zero, depending
on whether a section name is being defined, cited or used in \CEE/ text.
@<Type...@>=
typedef struct xref_info {
sixteen_bits num; /* section number plus zero or |def_flag| */
struct xref_info *xlink; /* pointer to the previous cross-reference */
} xref_info;
typedef xref_info *xref_pointer;
@ @<Global...@>=
xref_info xmem[max_refs]; /* contains cross-reference information */
xref_pointer xmem_end = xmem+max_refs-1;
xref_pointer xref_ptr; /* the largest occupied position in |xmem| */
sixteen_bits xref_switch,section_xref_switch; /* either zero or |def_flag| */
@ A section that is used for multi-file output (with the \.{@@(} feature)
has a special first cross-reference whose |num| field is |file_flag|.
@d file_flag (3*cite_flag)
@d def_flag (2*cite_flag)
@d cite_flag 10240 /* must be strictly larger than |max_sections| */
@d xref equiv_or_xref
@<Set init...@>=
xref_ptr=xmem; name_dir->xref=(char*)xmem; xref_switch=0; section_xref_switch=0;
xmem->num=0; /* sentinel value */
@ A new cross-reference for an identifier is formed by calling |new_xref|,
which discards duplicate entries and ignores non-underlined references
to one-letter identifiers or \CEE/'s reserved words.
If the user has sent the |no_xref| flag (the \.{-x} option of the command line),
it is unnecessary to keep track of cross-references for identifiers.
If one were careful, one could probably make more changes around section
100 to avoid a lot of identifier looking up.
@d append_xref(c) if (xref_ptr==xmem_end) overflow("cross-reference");
else (++xref_ptr)->num=c;
@d no_xref (flags['x']==0)
@d make_xrefs flags['x'] /* should cross references be output? */
@d is_tiny(p) ((p+1)->byte_start==(p)->byte_start+1)
@d unindexed(a) (a<res_wd_end && a->ilk>=custom)
/* tells if uses of a name are to be indexed */
@c
void
new_xref(p)
name_pointer p;
{
xref_pointer q; /* pointer to previous cross-reference */
sixteen_bits m, n; /* new and previous cross-reference value */
if (no_xref) return;
if ((unindexed(p) || is_tiny(p)) && xref_switch==0) return;
m=section_count+xref_switch; xref_switch=0; q=(xref_pointer)p->xref;
if (q != xmem) {
n=q->num;
if (n==m || n==m+def_flag) return;
else if (m==n+def_flag) {
q->num=m; return;
}
}
append_xref(m); xref_ptr->xlink=q; p->xref=(char*)xref_ptr;
}
@ The cross-reference lists for section names are slightly different.
Suppose that a section name is defined in sections $m_1$, \dots,
$m_k$, cited in sections $n_1$, \dots, $n_l$, and used in sections
$p_1$, \dots, $p_j$. Then its list will contain $m_1+|def_flag|$,
\dots, $m_k+|def_flag|$, $n_1+|cite_flag|$, \dots,
$n_l+|cite_flag|$, $p_1$, \dots, $p_j$, in this order.
Although this method of storage takes quadratic time with respect to
the length of the list, under foreseeable uses of \.{CWEAVE} this inefficiency
is insignificant.
@c
void
new_section_xref(p)
name_pointer p;
{
xref_pointer q,r; /* pointers to previous cross-references */
q=(xref_pointer)p->xref; r=xmem;
if (q>xmem)
while (q->num>section_xref_switch) {r=q; q=q->xlink;}
if (r->num==section_count+section_xref_switch)
return; /* don't duplicate entries */
append_xref(section_count+section_xref_switch);
xref_ptr->xlink=q; section_xref_switch=0;
if (r==xmem) p->xref=(char*)xref_ptr;
else r->xlink=xref_ptr;
}
@ The cross-reference list for a section name may also begin with
|file_flag|. Here's how that flag gets put~in.
@c
void
set_file_flag(p)
name_pointer p;
{
xref_pointer q;
q=(xref_pointer)p->xref;
if (q->num==file_flag) return;
append_xref(file_flag);
xref_ptr->xlink = q;
p->xref = (char *)xref_ptr;
}
@ A third large area of memory is used for sixteen-bit `tokens', which appear
in short lists similar to the strings of characters in |byte_mem|. Token lists
are used to contain the result of \CEE/ code translated into \TEX/ form;
further details about them will be explained later. A |text_pointer| variable
is an index into |tok_start|.
@<Typed...@>=
typedef sixteen_bits token;
typedef token *token_pointer;
typedef token_pointer *text_pointer;
@ The first position of |tok_mem|
that is unoccupied by replacement text is called |tok_ptr|, and the first
unused location of |tok_start| is called |text_ptr|.
Thus, we usually have |*text_ptr==tok_ptr|.
@<Global...@>=
token tok_mem[max_toks]; /* tokens */
token_pointer tok_mem_end = tok_mem+max_toks-1; /* end of |tok_mem| */
token_pointer tok_start[max_texts]; /* directory into |tok_mem| */
token_pointer tok_ptr; /* first unused position in |tok_mem| */
text_pointer text_ptr; /* first unused position in |tok_start| */
text_pointer tok_start_end = tok_start+max_texts-1; /* end of |tok_start| */
token_pointer max_tok_ptr; /* largest value of |tok_ptr| */
text_pointer max_text_ptr; /* largest value of |text_ptr| */
@ @<Set init...@>=
tok_ptr=tok_mem+1; text_ptr=tok_start+1; tok_start[0]=tok_mem+1;
tok_start[1]=tok_mem+1;
max_tok_ptr=tok_mem+1; max_text_ptr=tok_start+1;
@ Here are the three procedures needed to complete |id_lookup|:
@c
int names_match(p,first,l,t)
name_pointer p; /* points to the proposed match */
char *first; /* position of first character of string */
int l; /* length of identifier */
eight_bits t; /* desired ilk */
{
if (length(p)!=l) return 0;
if (p->ilk!=t && !(t==normal && abnormal(p))) return 0;
return !strncmp(first,p->byte_start,l);
}
void
init_p(p,t)
name_pointer p;
eight_bits t;
{
p->ilk=t; p->xref=(char*)xmem;
}
void
init_node(p)
name_pointer p;
{
p->xref=(char*)xmem;
}
@ We have to get \CEE/'s
reserved words into the hash table, and the simplest way to do this is
to insert them every time \.{CWEAVE} is run. Fortunately there are relatively
few reserved words. (Some of these are not strictly ``reserved,'' but
are defined in header files of the ISO Standard \CEE/ Library.)
@^reserved words@>
@<Store all the reserved words@>=
id_lookup("and",NULL,alfop);
id_lookup("and_eq",NULL,alfop);
id_lookup("asm",NULL,sizeof_like);
id_lookup("auto",NULL,int_like);
id_lookup("bitand",NULL,alfop);
id_lookup("bitor",NULL,alfop);
id_lookup("bool",NULL,raw_int);
id_lookup("break",NULL,case_like);
id_lookup("case",NULL,case_like);
id_lookup("catch",NULL,catch_like);
id_lookup("char",NULL,raw_int);
id_lookup("class",NULL,struct_like);
id_lookup("clock_t",NULL,raw_int);
id_lookup("compl",NULL,alfop);
id_lookup("const",NULL,const_like);
id_lookup("const_cast",NULL,raw_int);
id_lookup("continue",NULL,case_like);
id_lookup("default",NULL,case_like);
id_lookup("define",NULL,define_like);
id_lookup("defined",NULL,sizeof_like);
id_lookup("delete",NULL,delete_like);
id_lookup("div_t",NULL,raw_int);
id_lookup("do",NULL,do_like);
id_lookup("double",NULL,raw_int);
id_lookup("dynamic_cast",NULL,raw_int);
id_lookup("elif",NULL,if_like);
id_lookup("else",NULL,else_like);
id_lookup("endif",NULL,if_like);
id_lookup("enum",NULL,struct_like);
id_lookup("error",NULL,if_like);
id_lookup("explicit",NULL,int_like);
id_lookup("export",NULL,int_like);
id_lookup("extern",NULL,int_like);
id_lookup("FILE",NULL,raw_int);
id_lookup("float",NULL,raw_int);
id_lookup("for",NULL,for_like);
id_lookup("fpos_t",NULL,raw_int);
id_lookup("friend",NULL,int_like);
id_lookup("goto",NULL,case_like);
id_lookup("if",NULL,if_like);
id_lookup("ifdef",NULL,if_like);
id_lookup("ifndef",NULL,if_like);
id_lookup("include",NULL,if_like);
id_lookup("inline",NULL,int_like);
id_lookup("int",NULL,raw_int);
id_lookup("jmp_buf",NULL,raw_int);
id_lookup("ldiv_t",NULL,raw_int);
id_lookup("line",NULL,if_like);
id_lookup("long",NULL,raw_int);
id_lookup("mutable",NULL,int_like);
id_lookup("namespace",NULL,struct_like);
id_lookup("new",NULL,new_like);
id_lookup("not",NULL,alfop);
id_lookup("not_eq",NULL,alfop);
id_lookup("NULL",NULL,custom);
id_lookup("offsetof",NULL,raw_int);
id_lookup("operator",NULL,operator_like);
id_lookup("or",NULL,alfop);
id_lookup("or_eq",NULL,alfop);
id_lookup("pragma",NULL,if_like);
id_lookup("private",NULL,public_like);
id_lookup("protected",NULL,public_like);
id_lookup("ptrdiff_t",NULL,raw_int);
id_lookup("public",NULL,public_like);
id_lookup("register",NULL,int_like);
id_lookup("reinterpret_cast",NULL,raw_int);
id_lookup("return",NULL,case_like);
id_lookup("short",NULL,raw_int);
id_lookup("sig_atomic_t",NULL,raw_int);
id_lookup("signed",NULL,raw_int);
id_lookup("size_t",NULL,raw_int);
id_lookup("sizeof",NULL,sizeof_like);
id_lookup("static",NULL,int_like);
id_lookup("static_cast",NULL,raw_int);
id_lookup("struct",NULL,struct_like);
id_lookup("switch",NULL,for_like);
id_lookup("template",NULL,template_like);
id_lookup("this",NULL,custom);
id_lookup("throw",NULL,case_like);
id_lookup("time_t",NULL,raw_int);
id_lookup("try",NULL,else_like);
id_lookup("typedef",NULL,typedef_like);
id_lookup("typeid",NULL,raw_int);
id_lookup("typename",NULL,struct_like);
id_lookup("undef",NULL,if_like);
id_lookup("union",NULL,struct_like);
id_lookup("unsigned",NULL,raw_int);
id_lookup("using",NULL,int_like);
id_lookup("va_dcl",NULL,decl); /* Berkeley's variable-arg-list convention */
id_lookup("va_list",NULL,raw_int); /* ditto */
id_lookup("virtual",NULL,int_like);
id_lookup("void",NULL,raw_int);
id_lookup("volatile",NULL,const_like);
id_lookup("wchar_t",NULL,raw_int);
id_lookup("while",NULL,for_like);
id_lookup("xor",NULL,alfop);
id_lookup("xor_eq",NULL,alfop);
res_wd_end=name_ptr;
id_lookup("TeX",NULL,custom);
id_lookup("make_pair",NULL,func_template);
@* Lexical scanning.
Let us now consider the subroutines that read the \.{CWEB} source file
and break it into meaningful units. There are four such procedures:
One simply skips to the next `\.{@@\ }' or `\.{@@*}' that begins a
section; another passes over the \TEX/ text at the beginning of a
section; the third passes over the \TEX/ text in a \CEE/ comment;
and the last, which is the most interesting, gets the next token of
a \CEE/ text. They all use the pointers |limit| and |loc| into
the line of input currently being studied.
@ Control codes in \.{CWEB}, which begin with `\.{@@}', are converted
into a numeric code designed to simplify \.{CWEAVE}'s logic; for example,
larger numbers are given to the control codes that denote more significant
milestones, and the code of |new_section| should be the largest of
all. Some of these numeric control codes take the place of |char|
control codes that will not otherwise appear in the output of the
scanning routines.
@^ASCII code dependencies@>
@d ignore 00 /* control code of no interest to \.{CWEAVE} */
@d verbatim 02 /* takes the place of extended ASCII \.{\char2} */
@d begin_short_comment 03 /* \CPLUSPLUS/ short comment */
@d begin_comment '\t' /* tab marks will not appear */
@d underline '\n' /* this code will be intercepted without confusion */
@d noop 0177 /* takes the place of ASCII delete */
@d xref_roman 0203 /* control code for `\.{@@\^}' */
@d xref_wildcard 0204 /* control code for `\.{@@:}' */
@d xref_typewriter 0205 /* control code for `\.{@@.}' */
@d TeX_string 0206 /* control code for `\.{@@t}' */
@f TeX_string TeX
@d ord 0207 /* control code for `\.{@@'}' */
@d join 0210 /* control code for `\.{@@\&}' */
@d thin_space 0211 /* control code for `\.{@@,}' */
@d math_break 0212 /* control code for `\.{@@\v}' */
@d line_break 0213 /* control code for `\.{@@/}' */
@d big_line_break 0214 /* control code for `\.{@@\#}' */
@d no_line_break 0215 /* control code for `\.{@@+}' */
@d pseudo_semi 0216 /* control code for `\.{@@;}' */
@d macro_arg_open 0220 /* control code for `\.{@@[}' */
@d macro_arg_close 0221 /* control code for `\.{@@]}' */
@d trace 0222 /* control code for `\.{@@0}', `\.{@@1}' and `\.{@@2}' */
@d translit_code 0223 /* control code for `\.{@@l}' */
@d output_defs_code 0224 /* control code for `\.{@@h}' */
@d format_code 0225 /* control code for `\.{@@f}' and `\.{@@s}' */
@d definition 0226 /* control code for `\.{@@d}' */
@d begin_C 0227 /* control code for `\.{@@c}' */
@d section_name 0230 /* control code for `\.{@@<}' */
@d new_section 0231 /* control code for `\.{@@\ }' and `\.{@@*}' */
@ Control codes are converted to \.{CWEAVE}'s internal
representation by means of the table |ccode|.
@<Global...@>=
eight_bits ccode[256]; /* meaning of a char following \.{@@} */
@ @<Set ini...@>=
{int c; for (c=0; c<256; c++) ccode[c]=0;}
ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
=ccode['*']=new_section;
ccode['@@']='@@'; /* `quoted' at sign */
ccode['=']=verbatim;
ccode['d']=ccode['D']=definition;
ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
ccode['t']=ccode['T']=TeX_string;
ccode['l']=ccode['L']=translit_code;
ccode['q']=ccode['Q']=noop;
ccode['h']=ccode['H']=output_defs_code;
ccode['&']=join; ccode['<']=ccode['(']=section_name;
ccode['!']=underline; ccode['^']=xref_roman;
ccode[':']=xref_wildcard; ccode['.']=xref_typewriter; ccode[',']=thin_space;
ccode['|']=math_break; ccode['/']=line_break; ccode['#']=big_line_break;
ccode['+']=no_line_break; ccode[';']=pseudo_semi;
ccode['[']=macro_arg_open; ccode[']']=macro_arg_close;
ccode['\'']=ord;
@<Special control codes for debugging@>@;
@ Users can write
\.{@@2}, \.{@@1}, and \.{@@0} to turn tracing fully on, partly on,
and off, respectively.
@<Special control codes...@>=
ccode['0']=ccode['1']=ccode['2']=trace;
@ The |skip_limbo| routine is used on the first pass to skip through
portions of the input that are not in any sections, i.e., that precede
the first section. After this procedure has been called, the value of
|input_has_ended| will tell whether or not a section has actually been found.
There's a complication that we will postpone until later: If the \.{@@s}
operation appears in limbo, we want to use it to adjust the default
interpretation of identifiers.
@<Predec...@>=
void skip_limbo();
@ @c
void
skip_limbo() {
while(1) {
if (loc>limit && get_line()==0) return;
*(limit+1)='@@';
while (*loc!='@@') loc++; /* look for '@@', then skip two chars */
if (loc++ <=limit) { int c=ccode[(eight_bits)*loc++];
if (c==new_section) return;
if (c==noop) skip_restricted();
else if (c==format_code) @<Process simple format in limbo@>;
}
}
}
@ The |skip_TeX| routine is used on the first pass to skip through
the \TEX/ code at the beginning of a section. It returns the next
control code or `\.{\v}' found in the input. A |new_section| is
assumed to exist at the very end of the file.
@f skip_TeX TeX
@c
unsigned
skip_TeX() /* skip past pure \TEX/ code */
{
while (1) {
if (loc>limit && get_line()==0) return(new_section);
*(limit+1)='@@';
while (*loc!='@@' && *loc!='|') loc++;
if (*loc++ =='|') return('|');
if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
}
}
@*1 Inputting the next token.
As stated above, \.{CWEAVE}'s most interesting lexical scanning routine is the
|get_next| function that inputs the next token of \CEE/ input. However,
|get_next| is not especially complicated.
The result of |get_next| is either a |char| code for some special character,
or it is a special code representing a pair of characters (e.g., `\.{!=}'),
or it is the numeric value computed by the |ccode|
table, or it is one of the following special codes:
\yskip\hang |identifier|: In this case the global variables |id_first| and
|id_loc| will have been set to the beginning and ending-plus-one locations
in the buffer, as required by the |id_lookup| routine.
\yskip\hang |string|: The string will have been copied into the array
|section_text|; |id_first| and |id_loc| are set as above (now they are
pointers into |section_text|).
\yskip\hang |constant|: The constant is copied into |section_text|, with
slight modifications; |id_first| and |id_loc| are set.
\yskip\noindent Furthermore, some of the control codes cause
|get_next| to take additional actions:
\yskip\hang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|,
|verbatim|: The values of |id_first| and |id_loc| will have been set to
the beginning and ending-plus-one locations in the buffer.
\yskip\hang |section_name|: In this case the global variable |cur_section| will
point to the |byte_start| entry for the section name that has just been scanned.
The value of |cur_section_char| will be |'('| if the section name was
preceded by \.{@@(} instead of \.{@@<}.
\yskip\noindent If |get_next| sees `\.{@@!}'
it sets |xref_switch| to |def_flag| and goes on to the next token.
@d constant 0200 /* \CEE/ constant */
@d string 0201 /* \CEE/ string */
@d identifier 0202 /* \CEE/ identifier or reserved word */
@<Global...@>=
name_pointer cur_section; /* name of section just scanned */
char cur_section_char; /* the character just before that name */
@ @<Include...@>=
#include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
#include <stdlib.h> /* definition of |exit| */
@ As one might expect, |get_next| consists mostly of a big switch
that branches to the various special cases that can arise.
\CEE/ allows underscores to appear in identifiers, and some \CEE/
compilers even allow the dollar sign.
@d isxalpha(c) ((c)=='_' || (c)=='$')
/* non-alpha characters allowed in identifier */
@d ishigh(c) ((eight_bits)(c)>0177)
@^high-bit character handling@>
@<Predecl...@>=
eight_bits get_next();
@ @c
eight_bits
get_next() /* produces the next input token */
{@+eight_bits c; /* the current character */
while (1) {
@<Check if we're at the end of a preprocessor command@>;
if (loc>limit && get_line()==0) return(new_section);
c=*(loc++);
if (xisdigit(c) || c=='.') @<Get a constant@>@;
else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))@|
|| (c=='<' && sharp_include_line==1))
@<Get a string@>@;
else if (xisalpha(c) || isxalpha(c) || ishigh(c))
@<Get an identifier@>@;
else if (c=='@@') @<Get control code and possible section name@>@;
else if (xisspace(c)) continue; /* ignore spaces and tabs */
if (c=='#' && loc==buffer+1) @<Raise preprocessor flag@>;
mistake: @<Compress two-symbol operator@>@;
return(c);
}
}
@ Because preprocessor commands do not fit in with the rest of the syntax
of \CEE/,
we have to deal with them separately. One solution is to enclose such
commands between special markers. Thus, when a \.\# is seen as the
first character of a line, |get_next| returns a special code
|left_preproc| and raises a flag |preprocessing|.
We can use the same internal code number for |left_preproc| as we do
for |ord|, since |get_next| changes |ord| into a string.
@d left_preproc ord /* begins a preprocessor command */
@d right_preproc 0217 /* ends a preprocessor command */
@<Glob...@>=
boolean preprocessing=0; /* are we scanning a preprocessor command? */
@ @<Raise prep...@>= {
preprocessing=1;
@<Check if next token is |include|@>;
return (left_preproc);
}
@ An additional complication is the freakish use of \.< and \.> to delimit
a file name in lines that start with \.{\#include}. We must treat this file
name as a string.
@<Glob...@>=
boolean sharp_include_line=0; /* are we scanning a |#include| line? */
@ @<Check if next token is |include|@>=
while (loc<=buffer_end-7 && xisspace(*loc)) loc++;
if (loc<=buffer_end-6 && strncmp(loc,"include",7)==0) sharp_include_line=1;
@ When we get to the end of a preprocessor line,
we lower the flag and send a code |right_preproc|, unless
the last character was a \.\\.
@<Check if we're at...@>=
while (loc==limit-1 && preprocessing && *loc=='\\')
if (get_line()==0) return(new_section); /* still in preprocessor mode */
if (loc>=limit && preprocessing) {
preprocessing=sharp_include_line=0;
return(right_preproc);
}
@ The following code assigns values to the combinations \.{++},
\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{\v\v}, and
\.{\&\&}, and to the \CPLUSPLUS/
combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
The compound assignment operators (e.g., \.{+=}) are
treated as separate tokens.
@d compress(c) if (loc++<=limit) return(c)
@<Compress tw...@>=
switch(c) {
case '/': if (*loc=='*') {compress(begin_comment);}
else if (*loc=='/') compress(begin_short_comment); break;
case '+': if (*loc=='+') compress(plus_plus); break;
case '-': if (*loc=='-') {compress(minus_minus);}
else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
else compress(minus_gt); break;
case '.': if (*loc=='*') {compress(period_ast);}
else if (*loc=='.' && *(loc+1)=='.') {
loc++; compress(dot_dot_dot);
}
break;
case ':': if (*loc==':') compress(colon_colon); break;
case '=': if (*loc=='=') compress(eq_eq); break;
case '>': if (*loc=='=') {compress(gt_eq);}
else if (*loc=='>') compress(gt_gt); break;
case '<': if (*loc=='=') {compress(lt_eq);}
else if (*loc=='<') compress(lt_lt); break;
case '&': if (*loc=='&') compress(and_and); break;
case '|': if (*loc=='|') compress(or_or); break;
case '!': if (*loc=='=') compress(not_eq); break;
}
@ @<Get an identifier@>= {
id_first=--loc;
while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
id_loc=loc; return(identifier);
}
@ Different conventions are followed by \TEX/ and \CEE/ to express octal
and hexadecimal numbers; it is reasonable to stick to each convention
within its realm. Thus the \CEE/ part of a \.{CWEB} file has octals
introduced by \.0 and hexadecimals by \.{0x}, but \.{CWEAVE} will print
with \TEX/ macros that the user can redefine to fit the context.
In order to simplify such macros, we replace some of the characters.
Notice that in this section and the next, |id_first| and |id_loc|
are pointers into the array |section_text|, not into |buffer|.
@<Get a constant@>= {
id_first=id_loc=section_text+1;
if (*(loc-1)=='0') {
if (*loc=='x' || *loc=='X') {*id_loc++='^'; loc++;
while (xisxdigit(*loc)) *id_loc++=*loc++;} /* hex constant */
else if (xisdigit(*loc)) {*id_loc++='~';
while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */
else goto dec; /* decimal constant */
}
else { /* decimal constant */
if (*(loc-1)=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
dec: *id_loc++=*(loc-1);
while (xisdigit(*loc) || *loc=='.') *id_loc++=*loc++;
if (*loc=='e' || *loc=='E') { /* float constant */
*id_loc++='_'; loc++;
if (*loc=='+' || *loc=='-') *id_loc++=*loc++;
while (xisdigit(*loc)) *id_loc++=*loc++;
}
}
while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
|| *loc=='f' || *loc=='F') {
*id_loc++='$'; *id_loc++=toupper(*loc); loc++;
}
return(constant);
}
@ \CEE/ strings and character constants, delimited by double and single
quotes, respectively, can contain newlines or instances of their own
delimiters if they are protected by a backslash. We follow this
convention, but do not allow the string to be longer than |longest_name|.
@<Get a string@>= {
char delim = c; /* what started the string */
id_first = section_text+1;
id_loc = section_text;
if (delim=='\'' && *(loc-2)=='@@') {*++id_loc='@@'; *++id_loc='@@';}
*++id_loc=delim;
if (delim=='L') { /* wide character constant */
delim=*loc++; *++id_loc=delim;
}
if (delim=='<') delim='>'; /* for file names in |#include| lines */
while (1) {
if (loc>=limit) {
if(*(limit-1)!='\\') {
err_print("! String didn't end"); loc=limit; break;
@.String didn't end@>
}
if(get_line()==0) {
err_print("! Input ended in middle of string"); loc=buffer; break;
@.Input ended in middle of string@>
}
}
if ((c=*loc++)==delim) {
if (++id_loc<=section_text_end) *id_loc=c;
break;
}
if (c=='\\') if (loc>=limit) continue;
else if (++id_loc<=section_text_end) {
*id_loc = '\\'; c=*loc++;
}
if (++id_loc<=section_text_end) *id_loc=c;
}
if (id_loc>=section_text_end) {
printf("\n! String too long: ");
@.String too long@>
term_write(section_text+1,25);
printf("..."); mark_error;
}
id_loc++;
return(string);
}
@ After an \.{@@} sign has been scanned, the next character tells us
whether there is more work to do.
@<Get control code and possible section name@>= {
c=*loc++;
switch(ccode[(eight_bits)c]) {
case translit_code: err_print("! Use @@l in limbo only"); continue;
@.Use @@l in limbo...@>
case underline: xref_switch=def_flag; continue;
case trace: tracing=c-'0'; continue;
case xref_roman: case xref_wildcard: case xref_typewriter:
case noop: case TeX_string: c=ccode[c]; skip_restricted(); return(c);
case section_name:
@<Scan the section name and make |cur_section| point to it@>;
case verbatim: @<Scan a verbatim string@>;
case ord: @<Get a string@>;
default: return(ccode[(eight_bits)c]);
}
}
@ The occurrence of a section name sets |xref_switch| to zero,
because the section name might (for example) follow \&{int}.
@<Scan the section name...@>= {
char *k; /* pointer into |section_text| */
cur_section_char=*(loc-1);
@<Put section name into |section_text|@>;
if (k-section_text>3 && strncmp(k-2,"...",3)==0)
cur_section=section_lookup(section_text+1,k-3,1); /* 1 indicates a prefix */
else cur_section=section_lookup(section_text+1,k,0);
xref_switch=0; return(section_name);
}
@ Section names are placed into the |section_text| array with consecutive spaces,
tabs, and carriage-returns replaced by single spaces. There will be no
spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
this, since the |section_lookup| routine uses |section_text[1]| as the first
character of the name.)
@<Set init...@>=section_text[0]=' ';
@ @<Put section name...@>=
k=section_text;
while (1) {
if (loc>limit && get_line()==0) {
err_print("! Input ended in section name");
@.Input ended in section name@>
loc=buffer+1; break;
}
c=*loc;
@<If end of name or erroneous control code, |break|@>;
loc++; if (k<section_text_end) k++;
if (xisspace(c)) {
c=' '; if (*(k-1)==' ') k--;
}
*k=c;
}
if (k>=section_text_end) {
printf("\n! Section name too long: ");
@.Section name too long@>
term_write(section_text+1,25);
printf("..."); mark_harmless;
}
if (*k==' ' && k>section_text) k--;
@ @<If end of name...@>=
if (c=='@@') {
c=*(loc+1);
if (c=='>') {
loc+=2; break;
}
if (ccode[(eight_bits)c]==new_section) {
err_print("! Section name didn't end"); break;
@.Section name didn't end@>
}
if (c!='@@') {
err_print("! Control codes are forbidden in section name"); break;
@.Control codes are forbidden...@>
}
*(++k)='@@'; loc++; /* now |c==*loc| again */
}
@ This function skips over a restricted context at relatively high speed.
@<Predecl...@>=
void skip_restricted();
@ @c
void
skip_restricted()
{
id_first=loc; *(limit+1)='@@';
false_alarm:
while (*loc!='@@') loc++;
id_loc=loc;
if (loc++>limit) {
err_print("! Control text didn't end"); loc=limit;
@.Control text didn't end@>
}
else {
if (*loc=='@@'&&loc<=limit) {loc++; goto false_alarm;}
if (*loc++!='>')
err_print("! Control codes are forbidden in control text");
@.Control codes are forbidden...@>
}
}
@ At the present point in the program we
have |*(loc-1)==verbatim|; we set |id_first| to the beginning
of the string itself, and |id_loc| to its ending-plus-one location in the
buffer. We also set |loc| to the position just after the ending delimiter.
@<Scan a verbatim string@>= {
id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';