1 |
root |
1.1 |
package Tree::M; |
2 |
|
|
|
3 |
|
|
use Carp; |
4 |
|
|
use DynaLoader; |
5 |
|
|
|
6 |
|
|
BEGIN { |
7 |
root |
1.3 |
$VERSION = 0.02; |
8 |
root |
1.1 |
@ISA = qw(DynaLoader); |
9 |
|
|
bootstrap Tree::M, $VERSION; |
10 |
|
|
} |
11 |
|
|
|
12 |
|
|
=head1 NAME |
13 |
|
|
|
14 |
root |
1.3 |
Tree::M - implement M-trees for efficient "metric/multimedia-searches" |
15 |
root |
1.1 |
|
16 |
|
|
=head1 SYNOPSIS |
17 |
|
|
|
18 |
|
|
use Tree::M; |
19 |
|
|
|
20 |
root |
1.2 |
$M = new Tree::M |
21 |
|
|
|
22 |
root |
1.1 |
=head1 DESCRIPTION |
23 |
|
|
|
24 |
|
|
(not yet) |
25 |
|
|
|
26 |
|
|
Ever had the problem of managing multi-dimensional (spatial) data but your |
27 |
|
|
database only had one-dimensional indices (b-tree etc.)? Queries like |
28 |
|
|
|
29 |
|
|
select data from table where latitude > 40 and latitude < 50 |
30 |
root |
1.2 |
and longitude> 50 and longitude< 60; |
31 |
root |
1.1 |
|
32 |
|
|
are quite inefficient, unless longitude and latitude are part of the same |
33 |
root |
1.2 |
spatial index (e.g. an R-tree). |
34 |
root |
1.1 |
|
35 |
root |
1.2 |
An M-tree is an index tree that does not directly look at the stored keys |
36 |
|
|
but rather requires a I<distance> (a metric, e.g. a vector norm) function |
37 |
|
|
to be defined that sorts keys according to their distance. In the example |
38 |
|
|
above the distance function could be the maximum norm (C<max(x1-x2, |
39 |
|
|
y1-y2)>). The lookup above would then be something like this: |
40 |
|
|
|
41 |
|
|
my $res = $M->range([45,55], 5); |
42 |
|
|
|
43 |
|
|
This module implements an M-tree. Although the data structure and the |
44 |
|
|
distance function is arbitrary, the current version only implements |
45 |
|
|
n-dimensional discrete vectors and hardwires the distance function to the |
46 |
|
|
suared euclidean metric (i.e. C<(x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2 + |
47 |
|
|
...>). Evolution towards more freedom is expected ;) |
48 |
root |
1.1 |
|
49 |
|
|
=head2 THE Tree::M CLASS |
50 |
|
|
|
51 |
|
|
=over 4 |
52 |
|
|
|
53 |
root |
1.3 |
=item $M = new Tree::M arg => value, ... |
54 |
root |
1.2 |
|
55 |
|
|
Creates a new M-Tree. Before it can be used you have to call one of the |
56 |
root |
1.3 |
C<create> or C<open> methods below. |
57 |
root |
1.2 |
|
58 |
root |
1.3 |
ndims => integer |
59 |
|
|
the number of dimensions each vector has |
60 |
|
|
|
61 |
|
|
range => [min, max, steps] |
62 |
|
|
min the lowest allowable scalar value in each dimension |
63 |
|
|
max the maximum allowable number |
64 |
|
|
steps the number of discrete steps (used when stored externally) |
65 |
|
|
|
66 |
|
|
pagesize => integer |
67 |
|
|
the size of one page on underlying storage. usually 4096, but |
68 |
|
|
large objects (ndims > 20 or so) might want to increase this |
69 |
root |
1.2 |
|
70 |
|
|
Example: create an M-Tree that stores 8-bit rgb-values: |
71 |
|
|
|
72 |
root |
1.3 |
$M = new Tree::M ndims => 3, range => [0, 255, 256]; |
73 |
root |
1.2 |
|
74 |
|
|
Example: create an M-Tree that stores coordinates from -1..1 with 100 different steps: |
75 |
|
|
|
76 |
root |
1.3 |
$M = new Tree::M ndims => 2, range => [-1, 1, 100]; |
77 |
root |
1.2 |
|
78 |
|
|
=item $M->open(path) |
79 |
|
|
|
80 |
|
|
=item $M->create($path) |
81 |
|
|
|
82 |
|
|
Open or create the external storage file C<$path> and associate it with the tree. |
83 |
|
|
|
84 |
|
|
[this braindamaged API will go away ;)] |
85 |
|
|
|
86 |
|
|
=item $M->insert(\@v, $data) |
87 |
root |
1.1 |
|
88 |
root |
1.2 |
Insert a vector (given by an array reference) into the index and associate |
89 |
|
|
it with the value C<$data> (a 32-bit integer). |
90 |
root |
1.1 |
|
91 |
root |
1.3 |
=item $M->sync |
92 |
|
|
|
93 |
|
|
Synchronize the data file with memory. Useful after calling C<insert> to |
94 |
|
|
ensure the data actually reaches stable storage. |
95 |
|
|
|
96 |
root |
1.2 |
=item $res = $M->range(\@v, $radius) |
97 |
|
|
|
98 |
|
|
Search all entries not farther away from C<@v> then C<$radius> and return |
99 |
|
|
an arrayref containing the searchresults. |
100 |
|
|
|
101 |
|
|
Each result is again anarrayref composed like this: |
102 |
|
|
|
103 |
|
|
[\@v, $data] |
104 |
|
|
|
105 |
|
|
e.g. the same as given to the C<insert> method. |
106 |
|
|
|
107 |
|
|
=item $res = $M->top(\@v, $n) |
108 |
|
|
|
109 |
|
|
Return the C<$n> "nearest neighbours". The results arrayref (see C<range>) |
110 |
|
|
contains the C<$n> index values nearest to C<@v>, sorted for distance. |
111 |
|
|
|
112 |
|
|
=item $distance = $M->distance(\@v1, \@v2) |
113 |
|
|
|
114 |
|
|
Calculcate the distance between two vectors, just as they databse engine |
115 |
|
|
would do it. |
116 |
|
|
|
117 |
|
|
=item $depth = $M->maxlevel |
118 |
|
|
|
119 |
|
|
Return the maximum height of the tree (usually a small integer specifying |
120 |
|
|
the length of the path from the root to the farthest leaf) |
121 |
root |
1.1 |
|
122 |
|
|
=cut |
123 |
root |
1.3 |
|
124 |
|
|
sub new { |
125 |
|
|
my $class = shift; |
126 |
|
|
my %a = @_; |
127 |
|
|
$class->_new( |
128 |
|
|
$a{ndims}, |
129 |
|
|
$a{range}[0], |
130 |
|
|
$a{range}[1], |
131 |
|
|
$a{range}[2], |
132 |
|
|
$a{pagesize}, |
133 |
|
|
); |
134 |
|
|
} |
135 |
root |
1.1 |
|
136 |
|
|
=back |
137 |
root |
1.2 |
|
138 |
|
|
=head1 BUGS |
139 |
|
|
|
140 |
|
|
Inserting too many duplicate keys into the tree cause the C++ library to |
141 |
|
|
die, so don't do that. |
142 |
root |
1.1 |
|
143 |
|
|
=head1 AUTHOR |
144 |
|
|
|
145 |
|
|
Marc Lehmann <pcg@goof.com>. |
146 |
|
|
|
147 |
|
|
=head1 SEE ALSO |
148 |
|
|
|
149 |
|
|
perl(1), L<DBIx::SpatialKeys>. |
150 |
|
|
|
151 |
|
|
=cut |
152 |
|
|
|
153 |
|
|
1; |
154 |
|
|
|