… | |
… | |
8 | |
8 | |
9 | my $db = Geo::LatLon2Place->new ("/var/lib/mydb.cdb"); |
9 | my $db = Geo::LatLon2Place->new ("/var/lib/mydb.cdb"); |
10 | |
10 | |
11 | =head1 DESCRIPTION |
11 | =head1 DESCRIPTION |
12 | |
12 | |
13 | This is a simple-purpose module that tries to do one job: find the nearest |
13 | This is a single-purpose module that tries to do one job: find the nearest |
14 | placename for a point on earth. It doesn't claim to do a perfect job, but |
14 | placename for a point on earth. It doesn't claim to do a perfect job, but |
15 | it tries to be simple to set up, simple to use and be fast. |
15 | it tries to be simple to set up, simple to use and be fast. It doesn't |
|
|
16 | attempt to provide many features or nifty algorithms, and is meant to be |
|
|
17 | used in situations where you simply need a name for a coordinate without |
|
|
18 | becoming a GIS expert first. |
16 | |
19 | |
17 | =head2 BUILDING AND SETTING UP |
20 | =head2 BUILDING, SETTING UP AND USAGE |
18 | |
21 | |
19 | To build this module, you need tinycdb, a cdb implementation by Michael |
22 | To build this module, you need tinycdb, a cdb implementation by Michael |
20 | Tokarev, or a compatible library. On GNU/Debian-based systems you can get |
23 | Tokarev, or a compatible library. On GNU/Debian-based systems you can get |
21 | this by executing F<apt-get install libcdb-dev>. |
24 | this by executing F<apt-get install libcdb-dev>. |
22 | |
25 | |
… | |
… | |
27 | (L<https://www.geonames.org/export/>, note the license), for example, |
30 | (L<https://www.geonames.org/export/>, note the license), for example, |
28 | F<cities500.zip>, which lists all places with population 500 or more: |
31 | F<cities500.zip>, which lists all places with population 500 or more: |
29 | |
32 | |
30 | wget https://download.geonames.org/export/dump/cities500.zip |
33 | wget https://download.geonames.org/export/dump/cities500.zip |
31 | unzip cities500.zip |
34 | unzip cities500.zip |
32 | geo-latlon2place-makedb --geonames-gazetteer cities500.txt ll2place.cdb |
35 | geo-latlon2place-makedb cities500.txt cities500.ll2p |
33 | |
36 | |
34 | This will create a file F<ll2place.cdb> that you can use for lookups |
37 | This will create a file F<ll2p.cdb> that you can use for lookups |
35 | with this module. At the time of this writing, the F<cities500> database |
38 | with this module. At the time of this writing, the F<cities500> database |
36 | results in about a 10MB file while the F<allCountries> database results in |
39 | results in about a 10MB file while the F<allCountries> database results in |
37 | about 120MB. |
40 | about 120MB. |
38 | |
41 | |
|
|
42 | Lookups will return a string of the form C<placename, countrycode>. |
|
|
43 | |
|
|
44 | If you want to use the geonames postal code database (from |
|
|
45 | L<https://www.geonames.org/zip/>), use these commands: |
|
|
46 | |
|
|
47 | wget https://download.geonames.org/export/zip/allCountries.zip |
|
|
48 | unzip allCountries.zip |
|
|
49 | geo-latlon2place-makedb --extract geonames-postalcodes allCountries.txt allCountries.ll2p |
|
|
50 | |
|
|
51 | You can then use the resulting database like this: |
|
|
52 | |
|
|
53 | my $lookup = Geo::LatLon2Place->new ("allCountries.ll2p"); |
|
|
54 | |
|
|
55 | # and then do as many queries as you wish: |
|
|
56 | my $res = $lookup->(49, 8.4); |
|
|
57 | if (defined $res) { |
|
|
58 | utf8::decode $res; # convert $res from utf-8 to unicode |
|
|
59 | print "49, 8.4 found $res\n"; # should be Karlsruhe, DE for geonames |
|
|
60 | } else { |
|
|
61 | print "nothing found at 49, 8.4\n"; |
|
|
62 | } |
|
|
63 | |
|
|
64 | =head1 THE Geo::LatLon2Place CLASS |
|
|
65 | |
39 | =over 4 |
66 | =over |
40 | |
67 | |
41 | =cut |
68 | =cut |
42 | |
69 | |
43 | package Geo::LatLon2Place; |
70 | package Geo::LatLon2Place; |
44 | |
71 | |
… | |
… | |
48 | |
75 | |
49 | BEGIN { |
76 | BEGIN { |
50 | our $VERSION = 0.01; |
77 | our $VERSION = 0.01; |
51 | |
78 | |
52 | require XSLoader; |
79 | require XSLoader; |
53 | XSLoader::load __PACKAGE__, $VERSION; |
80 | XSLoader::load (__PACKAGE__, $VERSION); |
54 | |
81 | |
55 | eval 'sub TORAD() { ' . ((atan2 1,0) / 180) . ' }'; |
82 | eval 'sub TORAD() { ' . ((atan2 1,0) / 180) . ' }'; |
56 | } |
83 | } |
|
|
84 | |
|
|
85 | =item $lookup = Geo::LatLon2Place->new ($path) |
|
|
86 | |
|
|
87 | Opens a database created by F<geo-latlon2place-makedb> and return an |
|
|
88 | object that allows you to run queries against it. |
|
|
89 | |
|
|
90 | The database will be mmaped, so it will not be loaded into memory, but |
|
|
91 | your operating system will cache it appropriately. |
|
|
92 | |
|
|
93 | =cut |
57 | |
94 | |
58 | sub new { |
95 | sub new { |
59 | my ($class, $path) = @_; |
96 | my ($class, $path) = @_; |
60 | |
97 | |
61 | open my $fh, "<", $path |
98 | open my $fh, "<", $path |
… | |
… | |
80 | sub DESTROY { |
117 | sub DESTROY { |
81 | my ($self) = @_; |
118 | my ($self) = @_; |
82 | |
119 | |
83 | cdb_free $self->[1]; |
120 | cdb_free $self->[1]; |
84 | } |
121 | } |
|
|
122 | |
|
|
123 | =item $res = $lookup->lookup ($lat $lon[, $radius]) |
|
|
124 | |
|
|
125 | Looks up the point in the database that is "nearest" to C<$lat, $lon>, |
|
|
126 | search at leats up to C<$radius> kilometres. The default for C<$radius> is |
|
|
127 | the cell size the database is built with, and this usually works best, so |
|
|
128 | you usually do not specify this parameter. |
|
|
129 | |
|
|
130 | If something is found, the associated data blob (always a binary string) |
|
|
131 | is returned, otherwise you receive C<undef>. |
|
|
132 | |
|
|
133 | Unless you specify a cusotrm format, the data blob is actually a UTF-8 |
|
|
134 | string, so you might want to call C<utf8::decode> on it to get a unicode |
|
|
135 | astring. |
|
|
136 | |
|
|
137 | At the moment, the implementation is in pure perl, but will eventually |
|
|
138 | move to C. |
|
|
139 | |
|
|
140 | =cut |
85 | |
141 | |
86 | sub lookup { |
142 | sub lookup { |
87 | my ($self, $lat, $lon, $radius) = @_; |
143 | my ($self, $lat, $lon, $radius) = @_; |
88 | |
144 | |
89 | $radius ||= $self->[2]; |
145 | $radius ||= $self->[2]; |
… | |
… | |
115 | } |
171 | } |
116 | |
172 | |
117 | $res |
173 | $res |
118 | } |
174 | } |
119 | |
175 | |
|
|
176 | =back |
|
|
177 | |
|
|
178 | =head1 ALGORITHM |
|
|
179 | |
|
|
180 | The algorithm that this module implements consists of two parts: binning |
|
|
181 | and weighting (done when writing the database) and then finding the |
|
|
182 | nearest point. |
|
|
183 | |
|
|
184 | The first part bins all data points into a grid which has its minimum cell |
|
|
185 | size at the equator and poles, with somewhat larger cells in between. |
|
|
186 | |
|
|
187 | The lookup part will then read the cell that the coordinate is in and some |
|
|
188 | neighbouring cells (depending on the search radius, by default it will |
|
|
189 | read the eight cells around it). |
|
|
190 | |
|
|
191 | It will then calculate the (squared) distance to the search coordinate |
|
|
192 | using an approximate euclidean distance on an equireactangular |
|
|
193 | projection. The squared distance is multiplied with a weight (1..25 for |
|
|
194 | the geonames database, based on population and adminstrative status, |
|
|
195 | always 1 for postcal codes), and the minimum distance wins. |
|
|
196 | |
|
|
197 | Binning should not introduce errors, but bigger bins can slow down lookup |
|
|
198 | times due to having to look at more places. The lookup assumes a spherical |
|
|
199 | shape for the earth, the equirectangular projection stretches distances |
|
|
200 | unevenly and the euclidean distance calculation introduces further |
|
|
201 | errors. For typical distance (<< 100km) and the intended usage, these |
|
|
202 | errors should be considered negligible. |
|
|
203 | |
|
|
204 | =head1 SPEED |
|
|
205 | |
|
|
206 | The current implementation is written in pure perl, and on my machine, |
|
|
207 | typically does 10000-200000 lookups per second. The goal for version 1.0 |
|
|
208 | is to move the lookup to C. |
|
|
209 | |
|
|
210 | =head1 TENTATIVE ROADMAP |
|
|
211 | |
|
|
212 | The database writer should be accessible via a module, so you cna easily |
|
|
213 | generate your own databases without having to run an external command. |
|
|
214 | |
|
|
215 | The api might be extended to allow for multiple returns, or nearest |
|
|
216 | neighbour search. |
|
|
217 | |
|
|
218 | =head1 SEE ALSO |
|
|
219 | |
|
|
220 | L<geo-latlon2place-makedb> to create databases from common formats. |
|
|
221 | |
120 | =head1 AUTHOR |
222 | =head1 AUTHOR |
121 | |
223 | |
122 | Marc Lehmann <schmorp@schmorp.de> |
224 | Marc Lehmann <schmorp@schmorp.de> |
123 | http://home.schmorp.de/ |
225 | http://home.schmorp.de/ |
124 | |
226 | |