ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/dinfo/dinfo-import1
Revision: 1.9
Committed: Fri Aug 29 04:15:01 2003 UTC (20 years, 8 months ago) by root
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +35 -31 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/usr/bin/perl
2
3 use Carp;
4
5 use PApp::SQL;
6 use BerkeleyDB;
7
8 $DEST = "data/tsv";
9
10 my @cols = qw(hausnr name vorname zusatz1 zusatz2 zusatz3 vorwahl strasse plz ort branche typ);
11
12 local $PApp::SQL::DBH = PApp::SQL::connect_cached "dinfo::", "DBI:mysql:dinfo";
13
14 #sql_exec "truncate $_"
15 # for qw(branche haus name ort plz row strasse typ vorname vorwahl zusatz1 zusatz2 zusatz3);
16
17 for my $col (@cols) {
18 print "$col ANALYZE\n";
19
20 my %col;
21
22 open DMP, "<:raw", "data/dump/$col" or die;
23 $col{$_}++ while <DMP>;
24 close DMP;
25
26 print "$col SORT\n";
27 my @sort = sort { $col{$b} <=> $col{$a} } keys %col;
28
29 print "$col RENUMBER\n";
30 open COL, ">:raw", "data/col/$col" or die;
31 for (0 .. $#sort) {
32 print COL "$_\t$sort[$_]\n";
33 $col{$sort[$_]} = $_;
34 }
35 close COL;
36
37 print "$col CLEAR\n";
38 @sort = ();
39
40 print "$col WRITE\n";
41
42 open DMP, "<:raw", "data/dump/$col" or die;
43 open ROW, ">:raw", "data/row/$col" or die;
44
45 print ROW "$col{$_}\n" while <DMP>;
46
47 close ROW;
48 close DMP;
49 }
50
51 __END__
52 while (<STDIN>) {
53 chomp;
54 my @data = split /\t/;
55
56 $data[3] =~ s/^, //;
57 $data[7] =~ s/\.$// && $data[6] =~ s/ Geb$/Geb./;
58
59 $data[6] =~ /^((?:[a-zA-Z][-.\/a-zA-Z0-9]*)?) (?:\s*\+)? \s* ([0-9\ ]+)$/x or do {
60 warn "ERR: unparseable telnr. '$data[6]'";
61 next;
62 };
63
64 $data[6] = $2;
65 $data[13] = $1;
66
67 ($data[5] eq substr $data[6], 0, length $data[5])
68 and substr $data[6], 0, length $data[5], "";
69
70 if (length $data[6] > 12) {
71 warn "ERR: number too long '$data[6]'";
72 next;
73 }
74
75 for (@cvtid) {
76 $data[$_] = ($cache{"$_,$data[$_]"} ||= do {
77 my $data = $data[$_];
78 my $fh = $fh{$_} ||= do {
79 open my $fh, ">", "$DEST/$cvtid{$_}.txt"
80 or die "$DEST/$cvtid{$_}.txt: $!";
81 $fh;
82 };
83 my $id = $data eq "" ? 1 : 1 + ++$seq[$_];
84 $data =~ s/([\\\x0a\x09])/\\$1/g; $data =~ s/\x00/\\0/g;
85
86 print $fh "$id\t$data\n";
87
88 $id;
89 });
90 }
91
92 $data[13] < 16 or die "ERR: too many typ's";
93
94 $data[6] =~ s/ //g;
95 $data[6] = pack "H*", (substr "$data[6]000000000000", 0, 12) . sprintf "%x%x", 12 - length $data[6], $data[13];
96 #warn unpack "H*", $data[6];
97 $data[6] =~ s/([\\\x0a\x09])/\\$1/g; $data[6] =~ s/\x00/\\0/g;
98
99 #name vorname zusatz1 zusatz2 zusatz3 vorwahl nummer strasse haus plz ort branche
100
101 print +(join "\t",
102 $data[0], $data[1], $data[2], $data[3], $data[4],
103 $data[5], $data[6],
104 $data[7], $data[8], $data[9], $data[10], $data[12],
105 ), "\n";
106
107 $count++ & 4095 or warn time . " $count";
108
109 if ($count == 20_000_000) {
110 while (%cache) {
111 warn "copying cache to db...\n";
112 while (my ($k, $v) = each %cache) {
113 $cache2{$k} = delete $cache{$k};
114 }
115 }
116 warn "nuking mem cache...\n";
117 undef %cache;
118 warn "assigning db cache...\n";
119 *cache = \%cache2;
120 }
121 }
122