-
Notifications
You must be signed in to change notification settings - Fork 0
/
dupplot_old.pl
executable file
·98 lines (79 loc) · 2.04 KB
/
dupplot_old.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/perl
# Usage ./dupplot.pl file1 file2 >/tmp/data ;
# gnuplot, and type: set title "My Plot"; plot '/tmp/data'
# Author Raimon Grau <[email protected]>. Artistic License v2.0
use strict;
use warnings;
use Data::Dumper;
use Digest::MD5;
use File::Basename;
sub say {print @_,"\n";}
my %sanitizer = (".pl" => sub { $_ = shift; $_; },
".rb" => sub {
$_ = shift;
s/^\s*end\s*$//;
$_;
},
".lisp" => sub {
$_ = shift;
s/;.*//;
s/\)+$/\)/;
$_;
}
);
sub extension_for {
my $fn = shift;
my ($name, $dir, $ext) = fileparse($fn, qr/\.[^.]*/);
return $ext;
}
sub process_file_1 {
my $f = shift;
my %h=();
open(my $fh, "<", $f)
or die "Can't open < input.txt: $!";
my $ext = extension_for($f);
my $md5;
while(<$fh>) {
chomp;
$_ = $sanitizer{$ext}->($_) if exists $sanitizer{$ext};
next if /^\s*$/;
$md5 = Digest::MD5::md5_hex($_);
$h{$md5} = [] unless defined $h{$md5};
push @{$h{$md5}}, $.;
}
close($f);
return %h;
}
sub process_file_2 {
my $f = shift;
my $h1 = shift;
my @tuples;
open(my $fh, "<", $f)
or die "Can't open < input.txt: $!";
my $ext = extension_for($f);
my $md5;
while(<$fh>){
chomp;
$_ = $sanitizer{$ext}->($_) if exists $sanitizer{$ext};
next if /^\s*$/;
$md5 = Digest::MD5::md5_hex($_);
for (@{$h1->{$md5}}) {
push @tuples, [$. , $_];
}
}
close $f;
return \@tuples;
}
sub main {
# Get hashes and line numbers for first file
my %h1 = process_file_1(shift);
# Match hashes with lines from the second file
my $t = process_file_2(shift, \%h1);
# Print lines in a suitable form for gnuplot
for my $tuple (@$t) {
say $tuple->[0], " " , $tuple->[1] ;
}
}
main(shift, shift);
# Dumper($t);
# sed -e 's/;.*//' -e 's/ \+/ /' -e'/^$/d' | perl -MDigest::MD5=md5_hex -ne 'print md5_hex($_),"\n"' | cat -n >/tmp/b.txt