-
Notifications
You must be signed in to change notification settings - Fork 1
/
rules2frames
executable file
·38 lines (32 loc) · 1.24 KB
/
rules2frames
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/local/bin/perl -w
# Author: Jason Eisner, University of Pennsylvania
# Usage: rules2frames [-w] [-c] [files ...]
#
# Expects input that has been produced by "flatten -w ... | listrules ..."
# This has lines of the format
# NP~|NP|NN|length DT|the @ PP|IN|of
# which we want to turn into
# length NP~|NP|NN DT @ PP|IN
# i.e., three tab separated fields: word, LHS, RHS.
#
# -w says to leave the frames lexicalized.
require("stamp.inc"); &stamp; # modify $0 and @INC, and print timestamp
$lexicalized = 1, shift(@ARGV) if $ARGV[0] eq "-w";
die "$0: bad command line flags" if @ARGV && $ARGV[0] =~ /^-./;
while (<>) {
chop;
s/^(\S+:[0-9]+:\t)?//, $location = $&;
unless (/^\#/) { # unless a comment
$rules++;
die "$0:$location incorrect format" unless /^(.*)\t/;
$LHS = $1;
@RHS = split(" ",$');
unless ($lexicalized) {
foreach (@RHS) { s/\|[^|]*$//; } # strip off lexical item from each RHS element (note that @, [, [FOO, and ] don't have lex items but that's ok)
}
$LHS =~ /(.*)\|(.*)/ || die "$0:$location LHS $LHS missing lex item\n"; # strip off lexical item from LHS element
$_="$2\t$1\t@RHS";
}
print "$location$_\n";
}
print STDERR "$0: converted $rules rules to frames\n";