… | |
… | |
8 | |
8 | |
9 | my $db = Geo::LatLon2Place->new ("/var/lib/mydb.cdb"); |
9 | my $db = Geo::LatLon2Place->new ("/var/lib/mydb.cdb"); |
10 | |
10 | |
11 | =head1 DESCRIPTION |
11 | =head1 DESCRIPTION |
12 | |
12 | |
13 | This is a simple-purpose module that tries to do one job: find the nearest |
13 | This is a single-purpose module that tries to do one job: find the nearest |
14 | placename for a point on earth. It doesn't claim to do a perfect job, but |
14 | placename for a point on earth. It doesn't claim to do a perfect job, but |
15 | it tries to be simple to set up, simple to use and be fast. |
15 | it tries to be simple to set up, simple to use and be fast. It doesn't |
|
|
16 | attempt to provide many features or nifty algorithms, and is meant to be |
|
|
17 | used in situations where you simply need a name for a coordinate without |
|
|
18 | becoming a GIS expert first. |
16 | |
19 | |
17 | =head2 BUILDING AND SETTING UP |
20 | =head2 BUILDING, SETTING UP AND USAGE |
18 | |
21 | |
19 | To build this module, you need tinycdb, a cdb implementation by Michael |
22 | To build this module, you need tinycdb, a cdb implementation by Michael |
20 | Tokarev, or a compatible library. On GNU/Debian-based systems you can get |
23 | Tokarev, or a compatible library. On GNU/Debian-based systems you can get |
21 | this by executing F<apt-get install libcdb-dev>. |
24 | this by executing F<apt-get install libcdb-dev>. |
22 | |
25 | |
… | |
… | |
27 | (L<https://www.geonames.org/export/>, note the license), for example, |
30 | (L<https://www.geonames.org/export/>, note the license), for example, |
28 | F<cities500.zip>, which lists all places with population 500 or more: |
31 | F<cities500.zip>, which lists all places with population 500 or more: |
29 | |
32 | |
30 | wget https://download.geonames.org/export/dump/cities500.zip |
33 | wget https://download.geonames.org/export/dump/cities500.zip |
31 | unzip cities500.zip |
34 | unzip cities500.zip |
32 | geo-latlon2place-makedb --geonames-gazetteer cities500.txt ll2place.cdb |
35 | geo-latlon2place-makedb cities500.txt cities500.ll2p |
33 | |
36 | |
34 | This will create a file F<ll2place.cdb> that you can use for lookups |
37 | This will create a file F<ll2p.cdb> that you can use for lookups |
35 | with this module. At the time of this writing, the F<cities500> database |
38 | with this module. At the time of this writing, the F<cities500> database |
36 | results in about a 10MB file while the F<allCountries> database results in |
39 | results in about a 10MB file while the F<allCountries> database results in |
37 | about 120MB. |
40 | about 120MB. |
38 | |
41 | |
|
|
42 | Lookups will return a string of the form C<placename, countrycode>. |
|
|
43 | |
|
|
44 | If you want to use the geonames postal code database (from |
|
|
45 | L<https://www.geonames.org/zip/>), use these commands: |
|
|
46 | |
|
|
47 | wget https://download.geonames.org/export/zip/allCountries.zip |
|
|
48 | unzip allCountries.zip |
|
|
49 | geo-latlon2place-makedb --extract geonames-postalcodes allCountries.txt allCountries.ll2p |
|
|
50 | |
|
|
51 | You can then use the resulting database like this: |
|
|
52 | |
|
|
53 | my $lookup = Geo::LatLon2Place->new ("allCountries.ll2p"); |
|
|
54 | |
|
|
55 | # and then do as many queries as you wish: |
|
|
56 | my $res = $lookup->(49, 8.4); |
|
|
57 | if (defined $res) { |
|
|
58 | utf8::decode $res; # convert $res from utf-8 to unicode |
|
|
59 | print "49, 8.4 found $res\n"; # should be Karlsruhe, DE for geonames |
|
|
60 | } else { |
|
|
61 | print "nothing found at 49, 8.4\n"; |
|
|
62 | } |
|
|
63 | |
|
|
64 | =head1 THE Geo::LatLon2Place CLASS |
|
|
65 | |
39 | =over 4 |
66 | =over |
40 | |
67 | |
41 | =cut |
68 | =cut |
42 | |
69 | |
43 | package Geo::LatLon2Place; |
70 | package Geo::LatLon2Place; |
44 | |
71 | |
45 | use common::sense; |
72 | use common::sense; |
46 | |
73 | |
47 | use Carp (); |
74 | use Carp (); |
48 | |
75 | |
49 | BEGIN { |
76 | BEGIN { |
50 | our $VERSION = 0.01; |
77 | our $VERSION = '1.0'; |
51 | |
78 | |
52 | require XSLoader; |
79 | require XSLoader; |
53 | XSLoader::load __PACKAGE__, $VERSION; |
80 | XSLoader::load (__PACKAGE__, $VERSION); |
54 | |
81 | |
55 | eval 'sub TORAD() { ' . ((atan2 1,0) / 180) . ' }'; |
82 | eval 'sub TORAD() { ' . ((atan2 1,0) / 90) . ' }'; |
56 | } |
83 | } |
|
|
84 | |
|
|
85 | =item $lookup = Geo::LatLon2Place->new ($path) |
|
|
86 | |
|
|
87 | Opens a database created by F<geo-latlon2place-makedb> and return an |
|
|
88 | object that allows you to run queries against it. |
|
|
89 | |
|
|
90 | The database will be mmaped, so it will not be loaded into memory, but |
|
|
91 | your operating system will cache it appropriately. |
|
|
92 | |
|
|
93 | =cut |
57 | |
94 | |
58 | sub new { |
95 | sub new { |
59 | my ($class, $path) = @_; |
96 | my ($class, $path) = @_; |
60 | |
97 | |
61 | open my $fh, "<", $path |
98 | open my $fh, "<", $path |
… | |
… | |
69 | (my ($magic, $version), $self->[2], $self->[3]) = unpack "a4VVV", cdb_get $self->[1], ""; |
106 | (my ($magic, $version), $self->[2], $self->[3]) = unpack "a4VVV", cdb_get $self->[1], ""; |
70 | |
107 | |
71 | $magic eq "SRGL" |
108 | $magic eq "SRGL" |
72 | or Carp::croak "$path: not a Geo::LatLon2Place file"; |
109 | or Carp::croak "$path: not a Geo::LatLon2Place file"; |
73 | |
110 | |
74 | $version == 1 |
111 | $version == 2 |
75 | or Carp::croak "$path: version mismatch (got $version, expected 1)"; |
112 | or Carp::croak "$path: version mismatch (got $version, expected 2)"; |
76 | |
113 | |
77 | $self |
114 | $self |
78 | } |
115 | } |
79 | |
116 | |
80 | sub DESTROY { |
117 | sub DESTROY { |
81 | my ($self) = @_; |
118 | my ($self) = @_; |
82 | |
119 | |
83 | cdb_free $self->[1]; |
120 | cdb_free $self->[1]; |
84 | } |
121 | } |
85 | |
122 | |
|
|
123 | =item $res = $lookup->lookup ($lat, $lon[, $radius]) |
|
|
124 | |
|
|
125 | Looks up the point in the database that is "nearest" to C<$lat, $lon>, |
|
|
126 | search at leats up to C<$radius> kilometres. The default for C<$radius> is |
|
|
127 | the cell size the database is built with, and this usually works best, so |
|
|
128 | you usually do not specify this parameter. |
|
|
129 | |
|
|
130 | If something is found, the associated data blob (always a binary string) |
|
|
131 | is returned, otherwise you receive C<undef>. |
|
|
132 | |
|
|
133 | Unless you specify a custom format/extractor when building your database, |
|
|
134 | the data blob is actually a UTF-8 string, so you might want to call |
|
|
135 | C<utf8::decode> on it to get a unicode string: |
|
|
136 | |
|
|
137 | my $res = $db->lookup (47, 37); # near mariupol, UA |
|
|
138 | if (defined $res) { |
|
|
139 | utf8::decode $res; |
|
|
140 | # $res now contains the unicode result |
|
|
141 | } |
|
|
142 | |
|
|
143 | =cut |
|
|
144 | |
86 | sub lookup { |
145 | sub lookup { |
87 | my ($self, $lat, $lon, $radius) = @_; |
146 | my ($self, $lat, $lon, $radius) = @_; |
88 | |
147 | |
89 | $radius ||= $self->[2]; |
148 | lookup_ext_ $self->[1], $self->[2], $self->[3], $lat, $lon, 0, $radius, 0 |
90 | $radius = int +($radius + $self->[2] - 1) / $self->[2]; |
|
|
91 | |
|
|
92 | my $coslat = cos abs $lat * TORAD; |
|
|
93 | |
|
|
94 | my $blat = int $self->[3] * $coslat; |
|
|
95 | my $cx = int (($lon + 180) * $blat / 360); |
|
|
96 | my $cy = int (($lat + 90) * $self->[3] / 180); |
|
|
97 | |
|
|
98 | my ($min, $res) = (1e00); |
|
|
99 | |
|
|
100 | for my $y ($cy - $radius .. $cy + $radius) { |
|
|
101 | for my $x ($cx - $radius .. $cx + $radius) { |
|
|
102 | for (unpack "(C/a*)*", cdb_get $self->[1], pack "s< s<", $x, $y) { |
|
|
103 | my ($plat, $plon, $w, $data) = unpack "s< s< C a*"; |
|
|
104 | $plat = $plat * ( 90 / 32767); |
|
|
105 | $plon = $plon * (180 / 32767); |
|
|
106 | |
|
|
107 | my $dx = ($lon - $plon) * TORAD * $coslat; |
|
|
108 | my $dy = ($lat - $plat) * TORAD; |
|
|
109 | my $d2 = ($dx * $dx + $dy * $dy) * $w; |
|
|
110 | |
|
|
111 | $d2 >= $min |
|
|
112 | or ($min, $res) = ($d2, $data); |
|
|
113 | } |
|
|
114 | } |
|
|
115 | } |
|
|
116 | |
|
|
117 | $res |
|
|
118 | } |
149 | } |
|
|
150 | |
|
|
151 | =back |
|
|
152 | |
|
|
153 | =head1 ALGORITHM |
|
|
154 | |
|
|
155 | The algorithm that this module implements consists of two parts: binning |
|
|
156 | and weighting (done when writing the database) and then finding the |
|
|
157 | nearest point. |
|
|
158 | |
|
|
159 | The first part bins all data points into a grid which has its minimum cell |
|
|
160 | size at the equator and poles, with somewhat larger cells in between. |
|
|
161 | |
|
|
162 | The lookup part will then read the cell that the coordinate is in and some |
|
|
163 | neighbouring cells (depending on the search radius, by default it will |
|
|
164 | read the eight cells around it). |
|
|
165 | |
|
|
166 | It will then calculate the (squared) distance to the search coordinate |
|
|
167 | using an approximate euclidean distance on an equireactangular |
|
|
168 | projection. The squared distance is multiplied with a weight (1..25 for |
|
|
169 | the geonames database, based on population and adminstrative status, |
|
|
170 | always 1 for postal codes), and the minimum distance wins. |
|
|
171 | |
|
|
172 | Binning should not introduce errors, but bigger bins can slow down lookup |
|
|
173 | times due to having to look at more places. The lookup assumes a spherical |
|
|
174 | shape for the earth, the equirectangular projection stretches distances |
|
|
175 | unevenly and the euclidean distance calculation introduces further |
|
|
176 | errors. For typical distance (<< 100km) and the intended usage, these |
|
|
177 | errors should be considered negligible. |
|
|
178 | |
|
|
179 | =head1 SPEED |
|
|
180 | |
|
|
181 | On my machine, C<lookup> typically does more than a million lookups per |
|
|
182 | second - performance varies depending on result density and number of |
|
|
183 | indexed points. |
|
|
184 | |
|
|
185 | =head1 TENTATIVE ROADMAP |
|
|
186 | |
|
|
187 | The database writer should be accessible via a module, so you can easily |
|
|
188 | generate your own databases without having to run an external command. |
|
|
189 | |
|
|
190 | The API might be extended to allow for multiple lookups, multiple |
|
|
191 | returns, or nearest neighbour search, or more return values (distance, |
|
|
192 | coordinates). |
|
|
193 | |
|
|
194 | Longer lookups will take advantage of perlmulticore. |
|
|
195 | |
|
|
196 | =head1 PERL MULTICORE SUPPORT |
|
|
197 | |
|
|
198 | This is not yet implemented: |
|
|
199 | |
|
|
200 | This module supports the perl multicore specification |
|
|
201 | (L<http://perlmulticore.schmorp.de/>) when doing lookups. |
|
|
202 | |
|
|
203 | =head1 SEE ALSO |
|
|
204 | |
|
|
205 | L<geo-latlon2place-makedb> to create databases from common formats. |
119 | |
206 | |
120 | =head1 AUTHOR |
207 | =head1 AUTHOR |
121 | |
208 | |
122 | Marc Lehmann <schmorp@schmorp.de> |
209 | Marc Lehmann <schmorp@schmorp.de> |
123 | http://home.schmorp.de/ |
210 | http://home.schmorp.de/ |