Revision: | 1.3 |

Committed: | Wed May 23 02:11:14 2001 UTC (23 years, 2 months ago) by root |

Branch: | MAIN |

CVS Tags: | HEAD |

Changes since 1.2: |
+34 -10 lines |

Log Message: | *** empty log message *** |

# | User | Rev | Content |
---|---|---|---|

1 | root | 1.1 | package Tree::M; |

2 | |||

3 | use Carp; | ||

4 | use DynaLoader; | ||

5 | |||

6 | BEGIN { | ||

7 | root | 1.3 | $VERSION = 0.02; |

8 | root | 1.1 | @ISA = qw(DynaLoader); |

9 | bootstrap Tree::M, $VERSION; | ||

10 | } | ||

11 | |||

12 | =head1 NAME | ||

13 | |||

14 | root | 1.3 | Tree::M - implement M-trees for efficient "metric/multimedia-searches" |

15 | root | 1.1 | |

16 | =head1 SYNOPSIS | ||

17 | |||

18 | use Tree::M; | ||

19 | |||

20 | root | 1.2 | $M = new Tree::M |

21 | |||

22 | root | 1.1 | =head1 DESCRIPTION |

23 | |||

24 | (not yet) | ||

25 | |||

26 | Ever had the problem of managing multi-dimensional (spatial) data but your | ||

27 | database only had one-dimensional indices (b-tree etc.)? Queries like | ||

28 | |||

29 | select data from table where latitude > 40 and latitude < 50 | ||

30 | root | 1.2 | and longitude> 50 and longitude< 60; |

31 | root | 1.1 | |

32 | are quite inefficient, unless longitude and latitude are part of the same | ||

33 | root | 1.2 | spatial index (e.g. an R-tree). |

34 | root | 1.1 | |

35 | root | 1.2 | An M-tree is an index tree that does not directly look at the stored keys |

36 | but rather requires a I<distance> (a metric, e.g. a vector norm) function | ||

37 | to be defined that sorts keys according to their distance. In the example | ||

38 | above the distance function could be the maximum norm (C<max(x1-x2, | ||

39 | y1-y2)>). The lookup above would then be something like this: | ||

40 | |||

41 | my $res = $M->range([45,55], 5); | ||

42 | |||

43 | This module implements an M-tree. Although the data structure and the | ||

44 | distance function is arbitrary, the current version only implements | ||

45 | n-dimensional discrete vectors and hardwires the distance function to the | ||

46 | suared euclidean metric (i.e. C<(x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2 + | ||

47 | ...>). Evolution towards more freedom is expected ;) | ||

48 | root | 1.1 | |

49 | =head2 THE Tree::M CLASS | ||

50 | |||

51 | =over 4 | ||

52 | |||

53 | root | 1.3 | =item $M = new Tree::M arg => value, ... |

54 | root | 1.2 | |

55 | Creates a new M-Tree. Before it can be used you have to call one of the | ||

56 | root | 1.3 | C<create> or C<open> methods below. |

57 | root | 1.2 | |

58 | root | 1.3 | ndims => integer |

59 | the number of dimensions each vector has | ||

60 | |||

61 | range => [min, max, steps] | ||

62 | min the lowest allowable scalar value in each dimension | ||

63 | max the maximum allowable number | ||

64 | steps the number of discrete steps (used when stored externally) | ||

65 | |||

66 | pagesize => integer | ||

67 | the size of one page on underlying storage. usually 4096, but | ||

68 | large objects (ndims > 20 or so) might want to increase this | ||

69 | root | 1.2 | |

70 | Example: create an M-Tree that stores 8-bit rgb-values: | ||

71 | |||

72 | root | 1.3 | $M = new Tree::M ndims => 3, range => [0, 255, 256]; |

73 | root | 1.2 | |

74 | Example: create an M-Tree that stores coordinates from -1..1 with 100 different steps: | ||

75 | |||

76 | root | 1.3 | $M = new Tree::M ndims => 2, range => [-1, 1, 100]; |

77 | root | 1.2 | |

78 | =item $M->open(path) | ||

79 | |||

80 | =item $M->create($path) | ||

81 | |||

82 | Open or create the external storage file C<$path> and associate it with the tree. | ||

83 | |||

84 | [this braindamaged API will go away ;)] | ||

85 | |||

86 | =item $M->insert(\@v, $data) | ||

87 | root | 1.1 | |

88 | root | 1.2 | Insert a vector (given by an array reference) into the index and associate |

89 | it with the value C<$data> (a 32-bit integer). | ||

90 | root | 1.1 | |

91 | root | 1.3 | =item $M->sync |

92 | |||

93 | Synchronize the data file with memory. Useful after calling C<insert> to | ||

94 | ensure the data actually reaches stable storage. | ||

95 | |||

96 | root | 1.2 | =item $res = $M->range(\@v, $radius) |

97 | |||

98 | Search all entries not farther away from C<@v> then C<$radius> and return | ||

99 | an arrayref containing the searchresults. | ||

100 | |||

101 | Each result is again anarrayref composed like this: | ||

102 | |||

103 | [\@v, $data] | ||

104 | |||

105 | e.g. the same as given to the C<insert> method. | ||

106 | |||

107 | =item $res = $M->top(\@v, $n) | ||

108 | |||

109 | Return the C<$n> "nearest neighbours". The results arrayref (see C<range>) | ||

110 | contains the C<$n> index values nearest to C<@v>, sorted for distance. | ||

111 | |||

112 | =item $distance = $M->distance(\@v1, \@v2) | ||

113 | |||

114 | Calculcate the distance between two vectors, just as they databse engine | ||

115 | would do it. | ||

116 | |||

117 | =item $depth = $M->maxlevel | ||

118 | |||

119 | Return the maximum height of the tree (usually a small integer specifying | ||

120 | the length of the path from the root to the farthest leaf) | ||

121 | root | 1.1 | |

122 | =cut | ||

123 | root | 1.3 | |

124 | sub new { | ||

125 | my $class = shift; | ||

126 | my %a = @_; | ||

127 | $class->_new( | ||

128 | $a{ndims}, | ||

129 | $a{range}[0], | ||

130 | $a{range}[1], | ||

131 | $a{range}[2], | ||

132 | $a{pagesize}, | ||

133 | ); | ||

134 | } | ||

135 | root | 1.1 | |

136 | =back | ||

137 | root | 1.2 | |

138 | =head1 BUGS | ||

139 | |||

140 | Inserting too many duplicate keys into the tree cause the C++ library to | ||

141 | die, so don't do that. | ||

142 | root | 1.1 | |

143 | =head1 AUTHOR | ||

144 | |||

145 | Marc Lehmann <pcg@goof.com>. | ||

146 | |||

147 | =head1 SEE ALSO | ||

148 | |||

149 | perl(1), L<DBIx::SpatialKeys>. | ||

150 | |||

151 | =cut | ||

152 | |||

153 | 1; | ||

154 |