1 |
#!/opt/bin/perl |
2 |
|
3 |
use dinfo; |
4 |
|
5 |
for (qw(branche hausnr name ort plz strasse vorname vorwahl zusatz1 zusatz2 zusatz3)) { |
6 |
open my $fh, "<", "data/row/$_" or die "$_: $!"; |
7 |
push @fh, $fh; |
8 |
} |
9 |
|
10 |
open NUMMER, "<", "data/dump/nummer" or die "nummer: $!"; |
11 |
open ROW, ">", "data/col/row.txt" or die "row.txt: $!"; |
12 |
|
13 |
$/ = "\0"; |
14 |
|
15 |
$typ{""} = 1; |
16 |
|
17 |
my $count; |
18 |
|
19 |
for(;;) { |
20 |
for (0 .. $#fh) { |
21 |
sysread $fh[$_], my $buf, 4*1024*256; |
22 |
$data[$_] = [unpack "V*", $buf]; |
23 |
} |
24 |
|
25 |
last unless @{$data[0]}; |
26 |
|
27 |
$count += @{$data[0]}; |
28 |
print "$count\n"; |
29 |
for my $i (0 .. $#{$data[0]}) { |
30 |
$_ = <NUMMER>; |
31 |
/^((?:[a-zA-Z][-.\/a-zA-Z0-9]*)?) (?:\s*\+)? \s* ([0-9\ ]+)\0$/x or do { |
32 |
warn "ERR: unparseable telnr. '$_'"; |
33 |
next; |
34 |
}; |
35 |
|
36 |
my $typ = ($typ{$1} ||= 1 + ++$typnr) - 1; |
37 |
$_ = $2; |
38 |
|
39 |
if (/^0/) { |
40 |
s/^01\d\d//; |
41 |
s/^06033//; |
42 |
s/^0940//; |
43 |
} |
44 |
|
45 |
11 < length $_ and warn "WARN: truncated <$data[6][$i]:$_>\n"; |
46 |
|
47 |
$_ = dinfo::nummer2str $_; |
48 |
|
49 |
s/([\\\x0a\x09])/\\$1/g; s/\x00/\\0/g; |
50 |
|
51 |
print ROW "$data[0][$i]\t$data[1][$i]\t$data[2][$i]\t$data[3][$i]\t$data[4][$i]\t$data[5][$i]\t$data[6][$i]\t$_\t$typ\t$data[7][$i]\t$data[8][$i]\t$data[9][$i]\t$data[10][$i]\n"; |
52 |
} |
53 |
} |
54 |
|
55 |
open TYP, ">", "data/col/typ.txt" or die "typ.txt: $!"; |
56 |
while (my ($k, $v) = each %typ) { |
57 |
$v--; |
58 |
print TYP "$v\t$k\n"; |
59 |
} |
60 |
close TYP; |
61 |
|