… | |
… | |
72 | use common::sense; |
72 | use common::sense; |
73 | |
73 | |
74 | use Carp (); |
74 | use Carp (); |
75 | |
75 | |
76 | BEGIN { |
76 | BEGIN { |
77 | our $VERSION = 0.01; |
77 | our $VERSION = 0.9; |
78 | |
78 | |
79 | require XSLoader; |
79 | require XSLoader; |
80 | XSLoader::load (__PACKAGE__, $VERSION); |
80 | XSLoader::load (__PACKAGE__, $VERSION); |
81 | |
81 | |
82 | eval 'sub TORAD() { ' . ((atan2 1,0) / 180) . ' }'; |
82 | eval 'sub TORAD() { ' . ((atan2 1,0) / 90) . ' }'; |
83 | } |
83 | } |
84 | |
84 | |
85 | =item $lookup = Geo::LatLon2Place->new ($path) |
85 | =item $lookup = Geo::LatLon2Place->new ($path) |
86 | |
86 | |
87 | Opens a database created by F<geo-latlon2place-makedb> and return an |
87 | Opens a database created by F<geo-latlon2place-makedb> and return an |
… | |
… | |
106 | (my ($magic, $version), $self->[2], $self->[3]) = unpack "a4VVV", cdb_get $self->[1], ""; |
106 | (my ($magic, $version), $self->[2], $self->[3]) = unpack "a4VVV", cdb_get $self->[1], ""; |
107 | |
107 | |
108 | $magic eq "SRGL" |
108 | $magic eq "SRGL" |
109 | or Carp::croak "$path: not a Geo::LatLon2Place file"; |
109 | or Carp::croak "$path: not a Geo::LatLon2Place file"; |
110 | |
110 | |
111 | $version == 1 |
111 | $version == 2 |
112 | or Carp::croak "$path: version mismatch (got $version, expected 1)"; |
112 | or Carp::croak "$path: version mismatch (got $version, expected 2)"; |
113 | |
113 | |
114 | $self |
114 | $self |
115 | } |
115 | } |
116 | |
116 | |
117 | sub DESTROY { |
117 | sub DESTROY { |
… | |
… | |
128 | you usually do not specify this parameter. |
128 | you usually do not specify this parameter. |
129 | |
129 | |
130 | If something is found, the associated data blob (always a binary string) |
130 | If something is found, the associated data blob (always a binary string) |
131 | is returned, otherwise you receive C<undef>. |
131 | is returned, otherwise you receive C<undef>. |
132 | |
132 | |
133 | Unless you specify a cusotrm format, the data blob is actually a UTF-8 |
133 | Unless you specify a custom format/extractor when building your database, |
134 | string, so you might want to call C<utf8::decode> on it to get a unicode |
134 | the data blob is actually a UTF-8 string, so you might want to call |
135 | astring. |
135 | C<utf8::decode> on it to get a unicode string: |
136 | |
136 | |
137 | At the moment, the implementation is in pure perl, but will eventually |
137 | my $res = $db->lookup (47, 37); # near mariupol, UA |
138 | move to C. |
138 | if (defined $res) { |
|
|
139 | utf8::decode $res; |
|
|
140 | # $res now contains the unicode result |
|
|
141 | } |
139 | |
142 | |
140 | =cut |
143 | =cut |
141 | |
144 | |
142 | sub lookup { |
145 | sub lookup { |
143 | my ($self, $lat, $lon, $radius) = @_; |
146 | my ($self, $lat, $lon, $radius) = @_; |
144 | |
147 | |
145 | $radius ||= $self->[2]; |
148 | lookup_ext_ $self->[1], $self->[2], $self->[3], $lat, $lon, 0, $radius, 0 |
146 | $radius = int +($radius + $self->[2] - 1) / $self->[2]; |
|
|
147 | |
|
|
148 | my $coslat = cos abs $lat * TORAD; |
|
|
149 | |
|
|
150 | my $blat = int $self->[3] * $coslat; |
|
|
151 | my $cx = int (($lon + 180) * $blat / 360); |
|
|
152 | my $cy = int (($lat + 90) * $self->[3] / 180); |
|
|
153 | |
|
|
154 | my ($min, $res) = (1e00); |
|
|
155 | |
|
|
156 | for my $y ($cy - $radius .. $cy + $radius) { |
|
|
157 | for my $x ($cx - $radius .. $cx + $radius) { |
|
|
158 | for (unpack "(C/a*)*", cdb_get $self->[1], pack "s< s<", $x, $y) { |
|
|
159 | my ($plat, $plon, $w, $data) = unpack "s< s< C a*"; |
|
|
160 | $plat = $plat * ( 90 / 32767); |
|
|
161 | $plon = $plon * (180 / 32767); |
|
|
162 | |
|
|
163 | my $dx = ($lon - $plon) * TORAD * $coslat; |
|
|
164 | my $dy = ($lat - $plat) * TORAD; |
|
|
165 | my $d2 = ($dx * $dx + $dy * $dy) * $w; |
|
|
166 | |
|
|
167 | $d2 >= $min |
|
|
168 | or ($min, $res) = ($d2, $data); |
|
|
169 | } |
|
|
170 | } |
|
|
171 | } |
|
|
172 | |
|
|
173 | $res |
|
|
174 | } |
149 | } |
175 | |
150 | |
176 | =back |
151 | =back |
177 | |
152 | |
178 | =head1 ALGORITHM |
153 | =head1 ALGORITHM |
… | |
… | |
190 | |
165 | |
191 | It will then calculate the (squared) distance to the search coordinate |
166 | It will then calculate the (squared) distance to the search coordinate |
192 | using an approximate euclidean distance on an equireactangular |
167 | using an approximate euclidean distance on an equireactangular |
193 | projection. The squared distance is multiplied with a weight (1..25 for |
168 | projection. The squared distance is multiplied with a weight (1..25 for |
194 | the geonames database, based on population and adminstrative status, |
169 | the geonames database, based on population and adminstrative status, |
195 | always 1 for postcal codes), and the minimum distance wins. |
170 | always 1 for postal codes), and the minimum distance wins. |
196 | |
171 | |
197 | Binning should not introduce errors, but bigger bins can slow down lookup |
172 | Binning should not introduce errors, but bigger bins can slow down lookup |
198 | times due to having to look at more places. The lookup assumes a spherical |
173 | times due to having to look at more places. The lookup assumes a spherical |
199 | shape for the earth, the equirectangular projection stretches distances |
174 | shape for the earth, the equirectangular projection stretches distances |
200 | unevenly and the euclidean distance calculation introduces further |
175 | unevenly and the euclidean distance calculation introduces further |
201 | errors. For typical distance (<< 100km) and the intended usage, these |
176 | errors. For typical distance (<< 100km) and the intended usage, these |
202 | errors should be considered negligible. |
177 | errors should be considered negligible. |
203 | |
178 | |
204 | =head1 SPEED |
179 | =head1 SPEED |
205 | |
180 | |
206 | The current implementation is written in pure perl, and on my machine, |
181 | On my machine, C<lookup> typically does more than a million lookups per |
207 | typically does 10000-200000 lookups per second. The goal for version 1.0 |
182 | second - performance varies depending on result density and number of |
208 | is to move the lookup to C. |
183 | indexed points. |
209 | |
184 | |
210 | =head1 TENTATIVE ROADMAP |
185 | =head1 TENTATIVE ROADMAP |
211 | |
186 | |
212 | The database writer should be accessible via a module, so you cna easily |
187 | The database writer should be accessible via a module, so you can easily |
213 | generate your own databases without having to run an external command. |
188 | generate your own databases without having to run an external command. |
214 | |
189 | |
215 | The api might be extended to allow for multiple returns, or nearest |
190 | The API might be extended to allow for multiple lookups, multiple |
216 | neighbour search, or more return values (distance, coordinates). |
191 | returns, or nearest neighbour search, or more return values (distance, |
|
|
192 | coordinates). |
|
|
193 | |
|
|
194 | Longer lookups will take advantage of perlmulticore. |
217 | |
195 | |
218 | =head1 PERL MULTICORE SUPPORT |
196 | =head1 PERL MULTICORE SUPPORT |
|
|
197 | |
|
|
198 | This is not yet implemented: |
219 | |
199 | |
220 | This module supports the perl multicore specification |
200 | This module supports the perl multicore specification |
221 | (L<http://perlmulticore.schmorp.de/>) when doing lookups. |
201 | (L<http://perlmulticore.schmorp.de/>) when doing lookups. |
222 | |
202 | |
223 | =head1 SEE ALSO |
203 | =head1 SEE ALSO |