ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Geo-LatLon2Place/bin/geo-latlon2place-makedb
(Generate patch)

Comparing Geo-LatLon2Place/bin/geo-latlon2place-makedb (file contents):
Revision 1.2 by root, Mon Mar 14 03:26:20 2022 UTC vs.
Revision 1.3 by root, Tue Mar 15 07:33:40 2022 UTC

1#!/opt/bin/perl 1#!/opt/bin/perl
2
3# usage: geonames2cdb geonames.txt database.cdb
4# you can use allCountries.zp, or cities500.zip and so on
5# from https://download.geonames.org/export/dump/
6 2
7=head1 NAME 3=head1 NAME
8 4
9geo-latlon2place-makedb - generate database for use with Geo::LatLon2Place 5geo-latlon2place-makedb - generate database for use with Geo::LatLon2Place
10 6
24By default, the input file is considered to be in geonames gazetteer 20By default, the input file is considered to be in geonames gazetteer
25format, but this can be customized using B<--extract>. 21format, but this can be customized using B<--extract>.
26 22
27=over 23=over
28 24
29=item B<--cellsize> km (default C<20>) 25=item B<--cellsize> km (default C<20>, or C<10> for geonames-postalcodes)
30 26
31The (minimum) size of a single grid cell in km - the data is binned into 27The (minimum) size of a single grid cell in km - the data is binned into
32cells of at least this size. It should not be larger than the search 28cells of at least this size. It should not be larger than the search
33radius. 29radius.
34 30
162; 158;
163 159
164@ARGV == 2 160@ARGV == 2
165 or die "need exactly two paths: inputfile.txt outputdatabase.cdb\n"; 161 or die "need exactly two paths: inputfile.txt outputdatabase.cdb\n";
166 162
167$km ||= 20;
168$extract ||= "geonames"; 163$extract ||= "geonames";
169 164
170if ($extract eq "geonames") { 165if ($extract eq "geonames") {
171 $extract = sub { 166 $extract = sub {
172 my ($id, $name, undef, undef, $lat, $lon, $t1, $t2, $cc, undef, $a1, $s2, $a3, $a4, $pop, undef) = split /\t/; 167 my ($id, $name, undef, undef, $lat, $lon, $t1, $t2, $cc, undef, $a1, $s2, $a3, $a4, $pop, undef) = split /\t/;
194 } 189 }
195 190
196 ($lat, $lon, $w, "$name, $cc") 191 ($lat, $lon, $w, "$name, $cc")
197 }; 192 };
198} elsif ($extract eq "geonames-postalcodes") { 193} elsif ($extract eq "geonames-postalcodes") {
194 $km ||= 10;
199 $extract = sub { 195 $extract = sub {
200 my ($cc, $zip, $name, undef, undef, undef, undef, undef, undef, $lat, $lon, undef) = split /\t/; 196 my ($cc, $zip, $name, undef, undef, undef, undef, undef, undef, $lat, $lon, undef) = split /\t/;
201 197
202 ($lat, $lon, 1, "$zip $name, $cc") 198 ($lat, $lon, 1, "$zip $name, $cc")
203 }; 199 };
204} else { 200} else {
205 $extract = eval "#line 1 \"extract fragment\"\nsub { $extract; }"; 201 $extract = eval "#line 1 \"extract fragment\"\nsub { $extract; }";
206 die "$@" if $@; 202 die "$@" if $@;
207} 203}
208 204
205$km ||= 20;
206
209my $torad = (atan2 1,0) / 180; 207my $torad = (atan2 1,0) / 90;
210 208
211my $boxes = int 6378 * 2 * 2 * (atan2 1,0) / $km; # equator radius / cell size 209my $boxes = int 6378 * 2 * 2 * (atan2 1,0) / $km; # equator radius / cell size
212 210
213open my $fh, "<:perlio", $ARGV[0]; 211open my $fh, "<:perlio", $ARGV[0];
214 212
216 214
217while (<$fh>) { 215while (<$fh>) {
218 my ($lat, $lon, $w, $payload) = $extract->() 216 my ($lat, $lon, $w, $payload) = $extract->()
219 or next; 217 or next;
220 218
221 unless (255 - (2+2+1) >= length $payload) { 219 unless (255 >= length $payload) {
222 $payload =~ s/([^(\x20-\x7e])/sprintf "\\x%02x", ord $1/ge; 220 $payload =~ s/([^(\x20-\x7e])/sprintf "\\x%02x", ord $1/ge;
223 warn "payload too long, skipping: $payload\n"; 221 warn "payload too long, skipping: $payload\n";
224 next; 222 next;
225 } 223 }
226 224
227 # higher latitudes get a (much) lower resolution, so we always get at least $km cells 225 my $blat = int $boxes * cos $lat * $torad; # can be 0, but does not matter
228 my $blat = int $boxes * cos abs $lat * $torad;
229 my $x = int (($lon + 180) * $blat / 360); 226 my $x = int (($lon + 180) * $blat / 360);
230 my $y = int (($lat + 90) * $boxes / 180); 227 my $y = int (($lat + 90) * $boxes / 180);
231 228
232 # we use 16 bit for lat/lon, 8 bi8t for the weight, BER-id, counted name and CC 229 # we use 16 bit for lat/lon, 8 bi8t for the weight, BER-id, counted name and CC
233 push @{ $grid[$y][$x] }, pack "C/a*", pack "s< s< C a*", $lat * 32767 / 90, $lon * 32767 / 180, $w, $payload; 230 push @{ $grid[$y][$x] }, pack "s< s< C C/a*", $lat * 32767 / 90, $lon * 32767 / 180, $w, $payload;
234} 231}
235 232
236my ($max, $sum, $cnt); 233my ($max, $sum, $cnt);
237 234
238open my $cdb, ">", $ARGV[1] 235open my $cdb, ">", $ARGV[1]
239 or die; 236 or die;
240Geo::LatLon2Place::cdb_make_start fileno $cdb 237Geo::LatLon2Place::cdb_make_start fileno $cdb
241 and die "cdb_make_start failure"; 238 and die "cdb_make_start failure";
242 239
243Geo::LatLon2Place::cdb_make_add "", pack "a4VVV", "SRGL", 1, $km, $boxes; 240Geo::LatLon2Place::cdb_make_add "", pack "a4VVV", "SRGL", 2, $km, $boxes;
244 241
245for my $y (0 .. $#grid) { 242for my $y (0 .. $#grid) {
246 my $r = $grid[$y]; 243 my $r = $grid[$y];
247 for my $x (0 .. $#$r) { 244 for my $x (0 .. $#$r) {
248 my $c = $r->[$x] 245 my $c = $r->[$x]
263Geo::LatLon2Place::cdb_make_finish 260Geo::LatLon2Place::cdb_make_finish
264 and die "cdb_make_finish failed"; 261 and die "cdb_make_finish failed";
265close $cdb; 262close $cdb;
266 263
267$max->[2] = $max->[2] * 180 / $boxes - 90; 264$max->[2] = $max->[2] * 180 / $boxes - 90;
268my $blat = int $boxes * cos abs $max->[2] * $torad; 265my $blat = int $boxes * cos $max->[2] * $torad;
269$max->[1] = $max->[1] * 360 / $blat - 180; 266$max->[1] = $max->[1] * 360 / $blat - 180;
270print "cell size: $km km, grid size: $boxes, non-empty cell count: $cnt\naverage cell size: ", int $sum/$cnt, ", maximum cell size: $max->[0] (at $max->[2] $max->[1])\n"; 267print "cell size: $km km, grid size: $boxes, non-empty cell count: $cnt\naverage cell size: ", int $sum/$cnt, ", maximum cell size: $max->[0] (at $max->[2] $max->[1])\n";
271 268

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines