1 |
root |
1.1 |
package Tree::M; |
2 |
|
|
|
3 |
|
|
use Carp; |
4 |
|
|
use DynaLoader; |
5 |
|
|
|
6 |
|
|
BEGIN { |
7 |
|
|
$VERSION = 0.01; |
8 |
|
|
@ISA = qw(DynaLoader); |
9 |
|
|
bootstrap Tree::M, $VERSION; |
10 |
|
|
} |
11 |
|
|
|
12 |
|
|
=head1 NAME |
13 |
|
|
|
14 |
|
|
Tree::M - implement M-trees for efficient "multimedia-searched" |
15 |
|
|
|
16 |
|
|
=head1 SYNOPSIS |
17 |
|
|
|
18 |
|
|
use Tree::M; |
19 |
|
|
|
20 |
root |
1.2 |
$M = new Tree::M |
21 |
|
|
|
22 |
root |
1.1 |
=head1 DESCRIPTION |
23 |
|
|
|
24 |
|
|
(not yet) |
25 |
|
|
|
26 |
|
|
Ever had the problem of managing multi-dimensional (spatial) data but your |
27 |
|
|
database only had one-dimensional indices (b-tree etc.)? Queries like |
28 |
|
|
|
29 |
|
|
select data from table where latitude > 40 and latitude < 50 |
30 |
root |
1.2 |
and longitude> 50 and longitude< 60; |
31 |
root |
1.1 |
|
32 |
|
|
are quite inefficient, unless longitude and latitude are part of the same |
33 |
root |
1.2 |
spatial index (e.g. an R-tree). |
34 |
root |
1.1 |
|
35 |
root |
1.2 |
An M-tree is an index tree that does not directly look at the stored keys |
36 |
|
|
but rather requires a I<distance> (a metric, e.g. a vector norm) function |
37 |
|
|
to be defined that sorts keys according to their distance. In the example |
38 |
|
|
above the distance function could be the maximum norm (C<max(x1-x2, |
39 |
|
|
y1-y2)>). The lookup above would then be something like this: |
40 |
|
|
|
41 |
|
|
my $res = $M->range([45,55], 5); |
42 |
|
|
|
43 |
|
|
This module implements an M-tree. Although the data structure and the |
44 |
|
|
distance function is arbitrary, the current version only implements |
45 |
|
|
n-dimensional discrete vectors and hardwires the distance function to the |
46 |
|
|
suared euclidean metric (i.e. C<(x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2 + |
47 |
|
|
...>). Evolution towards more freedom is expected ;) |
48 |
root |
1.1 |
|
49 |
|
|
=head2 THE Tree::M CLASS |
50 |
|
|
|
51 |
|
|
=over 4 |
52 |
|
|
|
53 |
root |
1.2 |
=item $M = new Tree::M ndims, min, max, steps |
54 |
|
|
|
55 |
|
|
Creates a new M-Tree. Before it can be used you have to call one of the |
56 |
|
|
C<create> or C<open> methods. |
57 |
|
|
|
58 |
|
|
ndims the number of dimensions each vector has |
59 |
|
|
min the lowest allowable scalar value in each dimension |
60 |
|
|
max the maximum allowable number |
61 |
|
|
steps the number of discrete steps (used when stored externally) |
62 |
|
|
|
63 |
|
|
Example: create an M-Tree that stores 8-bit rgb-values: |
64 |
|
|
|
65 |
|
|
$M = new Tree::M 3, 0, 255, 256; |
66 |
|
|
|
67 |
|
|
Example: create an M-Tree that stores coordinates from -1..1 with 100 different steps: |
68 |
|
|
|
69 |
|
|
$M = new Tree::M 2, -1, 1, 100; |
70 |
|
|
|
71 |
|
|
=item $M->open(path) |
72 |
|
|
|
73 |
|
|
=item $M->create($path) |
74 |
|
|
|
75 |
|
|
Open or create the external storage file C<$path> and associate it with the tree. |
76 |
|
|
|
77 |
|
|
[this braindamaged API will go away ;)] |
78 |
|
|
|
79 |
|
|
=item $M->insert(\@v, $data) |
80 |
root |
1.1 |
|
81 |
root |
1.2 |
Insert a vector (given by an array reference) into the index and associate |
82 |
|
|
it with the value C<$data> (a 32-bit integer). |
83 |
root |
1.1 |
|
84 |
root |
1.2 |
=item $res = $M->range(\@v, $radius) |
85 |
|
|
|
86 |
|
|
Search all entries not farther away from C<@v> then C<$radius> and return |
87 |
|
|
an arrayref containing the searchresults. |
88 |
|
|
|
89 |
|
|
Each result is again anarrayref composed like this: |
90 |
|
|
|
91 |
|
|
[\@v, $data] |
92 |
|
|
|
93 |
|
|
e.g. the same as given to the C<insert> method. |
94 |
|
|
|
95 |
|
|
=item $res = $M->top(\@v, $n) |
96 |
|
|
|
97 |
|
|
Return the C<$n> "nearest neighbours". The results arrayref (see C<range>) |
98 |
|
|
contains the C<$n> index values nearest to C<@v>, sorted for distance. |
99 |
|
|
|
100 |
|
|
=item $distance = $M->distance(\@v1, \@v2) |
101 |
|
|
|
102 |
|
|
Calculcate the distance between two vectors, just as they databse engine |
103 |
|
|
would do it. |
104 |
|
|
|
105 |
|
|
=item $depth = $M->maxlevel |
106 |
|
|
|
107 |
|
|
Return the maximum height of the tree (usually a small integer specifying |
108 |
|
|
the length of the path from the root to the farthest leaf) |
109 |
root |
1.1 |
|
110 |
|
|
=cut |
111 |
|
|
|
112 |
|
|
=back |
113 |
root |
1.2 |
|
114 |
|
|
=head1 BUGS |
115 |
|
|
|
116 |
|
|
Inserting too many duplicate keys into the tree cause the C++ library to |
117 |
|
|
die, so don't do that. |
118 |
root |
1.1 |
|
119 |
|
|
=head1 AUTHOR |
120 |
|
|
|
121 |
|
|
Marc Lehmann <pcg@goof.com>. |
122 |
|
|
|
123 |
|
|
=head1 SEE ALSO |
124 |
|
|
|
125 |
|
|
perl(1), L<DBIx::SpatialKeys>. |
126 |
|
|
|
127 |
|
|
=cut |
128 |
|
|
|
129 |
|
|
1; |
130 |
|
|
|