#!/opt/bin/perl use dinfo; for (qw(branche hausnr name ort plz strasse vorname vorwahl zusatz1 zusatz2 zusatz3)) { open my $fh, "<", "data/row/$_" or die "$_: $!"; push @fh, $fh; } open NUMMER, "<", "data/dump/nummer" or die "nummer: $!"; open ROW, ">", "data/col/row.txt" or die "row.txt: $!"; $/ = "\0"; $typ{""} = 1; my $count; for(;;) { for (0 .. $#fh) { sysread $fh[$_], my $buf, 4*1024*256; $data[$_] = [unpack "V*", $buf]; } last unless @{$data[0]}; $count += @{$data[0]}; print "$count\n"; for my $i (0 .. $#{$data[0]}) { $_ = ; /^((?:[a-zA-Z][-.\/a-zA-Z0-9]*)?) (?:\s*\+)? \s* ([0-9\ ]+)\0$/x or do { warn "ERR: unparseable telnr. '$_'"; next; }; my $typ = ($typ{$1} ||= 1 + ++$typnr) - 1; $_ = $2; if (/^0/) { s/^01\d\d//; s/^06033//; s/^0940//; } 11 < length $_ and warn "WARN: truncated <$data[6][$i]:$_>\n"; $_ = dinfo::nummer2str $_; s/([\\\x0a\x09])/\\$1/g; s/\x00/\\0/g; print ROW "$data[0][$i]\t$data[1][$i]\t$data[2][$i]\t$data[3][$i]\t$data[4][$i]\t$data[5][$i]\t$data[6][$i]\t$_\t$typ\t$data[7][$i]\t$data[8][$i]\t$data[9][$i]\t$data[10][$i]\n"; } } open TYP, ">", "data/col/typ.txt" or die "typ.txt: $!"; while (my ($k, $v) = each %typ) { $v--; print TYP "$v\t$k\n"; } close TYP;