1 |
#!/opt/bin/perl |
2 |
|
3 |
use dinfo; |
4 |
|
5 |
for (qw(branche hausnr name ort plz strasse vorname vorwahl zusatz1 zusatz2 zusatz3)) { |
6 |
open my $fh, "<", "data/row/$_" or die "$_: $!"; |
7 |
push @fh, $fh; |
8 |
} |
9 |
|
10 |
open NUMMER, "<", "data/dump/nummer" or die "nummer: $!"; |
11 |
open ROW, ">", "data/row.txt" or die "row.txt: $!"; |
12 |
|
13 |
$/ = "\0"; |
14 |
|
15 |
$typ{""} = 1; |
16 |
|
17 |
for(;;) { |
18 |
for (0 .. $#fh) { |
19 |
sysread $fh[$_], my $buf, 4*1024*256; |
20 |
$data[$_] = [unpack "V*", $buf]; |
21 |
} |
22 |
|
23 |
print "$#{$data[0]}\n"; |
24 |
for my $i (0 .. $#{$data[0]}) { |
25 |
$_ = <NUMMER>; |
26 |
/^((?:[a-zA-Z][-.\/a-zA-Z0-9]*)?) (?:\s*\+)? \s* ([0-9\ ]+)\0$/x or do { |
27 |
warn "ERR: unparseable telnr. '$_'"; |
28 |
next; |
29 |
}; |
30 |
|
31 |
my $typ = ($typ{$1} ||= 1 + ++$typnr) - 1; |
32 |
$_ = $2; |
33 |
|
34 |
s/^01\d\d//; |
35 |
s/^06033//; |
36 |
|
37 |
11 < length $_ and warn "WARN: truncated <$data[6][$i]:$_>\n"; |
38 |
|
39 |
$_ = dinfo::nummer2str $_; |
40 |
|
41 |
s/([\\\x0a\x09])/\\$1/g; s/\x00/\\0/g; |
42 |
|
43 |
print ROW "$data[0][$i]\t$data[1][$i]\t$data[2][$i]\t$data[3][$i]\t$data[4][$i]\t$data[5][$i]\t$data[6][$i]\t$_\t$typ\t$data[7][$i]\t$data[8][$i]\t$data[9][$i]\t$data[10][$i]\n"; |
44 |
} |
45 |
} |
46 |
|
47 |
open TYP, ">", "data/col/typ.txt" or die "typ.txt: $!"; |
48 |
while (my ($k, $v) = each %typ) { |
49 |
print TYP "$v\t$k\n"; |
50 |
} |
51 |
close TYP; |
52 |
|