1 |
/********************************************************************* |
2 |
* * |
3 |
* Copyright (c) 1997,1998, 1999 * |
4 |
* Multimedia DB Group and DEIS - CSITE-CNR, * |
5 |
* University of Bologna, Bologna, ITALY. * |
6 |
* * |
7 |
* All Rights Reserved. * |
8 |
* * |
9 |
* Permission to use, copy, and distribute this software and its * |
10 |
* documentation for NON-COMMERCIAL purposes and without fee is * |
11 |
* hereby granted provided that this copyright notice appears in * |
12 |
* all copies. * |
13 |
* * |
14 |
* THE AUTHORS MAKE NO REPRESENTATIONS OR WARRANTIES ABOUT THE * |
15 |
* SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING * |
16 |
* BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, * |
17 |
* FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHOR * |
18 |
* SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A * |
19 |
* RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS * |
20 |
* DERIVATIVES. * |
21 |
* * |
22 |
*********************************************************************/ |
23 |
|
24 |
#ifdef UNIX |
25 |
#include <unistd.h> |
26 |
#endif |
27 |
#include "MT.h" |
28 |
|
29 |
extern double MIN_UTIL; |
30 |
|
31 |
// find the node having the minimum number of children |
32 |
// this is used in the redistributing phase of the BulkLoad algorithm |
33 |
int |
34 |
FindMin(int *children, int max) |
35 |
{ |
36 |
int j, jmin=0; |
37 |
|
38 |
for(j=1; j<max; j++) |
39 |
if(children[j]<children[jmin]) jmin=j; |
40 |
return jmin; |
41 |
} |
42 |
|
43 |
// return root level+1 (the height of the tree) |
44 |
// this is used in the "splitting" phase of the BulkLoad algorithm |
45 |
int |
46 |
MT::MaxLevel() const |
47 |
{ |
48 |
GiSTnode *root; |
49 |
GiSTpath path; |
50 |
int i; |
51 |
|
52 |
path.MakeRoot(); |
53 |
root=ReadNode(path); |
54 |
i=root->Level(); |
55 |
delete root; |
56 |
return i+1; |
57 |
} |
58 |
|
59 |
// split this M-tree into a list of trees having height level |
60 |
// this is used in the "splitting" phase of the BulkLoad algorithm |
61 |
GiSTlist<char *> * |
62 |
MT::SplitTree(int *ncreated, int level, GiSTlist<MTentry *> *children, char *name) |
63 |
// ncreated is the number of created sub-trees, |
64 |
// level is the split level for the tree, |
65 |
// children is the list of the parents of each sub-tree, |
66 |
// name is the root for the sub-trees names |
67 |
// the return value is the list of splitted sub-trees |
68 |
{ |
69 |
GiSTlist<char *> *trees=new GiSTlist<char *>; // this is the results list |
70 |
GiSTlist<MTnode *> *oldList=new GiSTlist<MTnode *>; // this is the nodes list |
71 |
GiSTpath path; |
72 |
MTnode *node=new MTnode; // this is because the first operation on node is a delete |
73 |
char newname[50]; |
74 |
|
75 |
path.MakeRoot(); |
76 |
oldList->Append((MTnode *)ReadNode(path)); |
77 |
do { // build the roots list |
78 |
GiSTlist<MTnode *> *newList=new GiSTlist<MTnode *>; // this is the list of the current level nodes |
79 |
|
80 |
while(!oldList->IsEmpty()) { |
81 |
delete node; // delete the old node created by ReadNode |
82 |
node=oldList->RemoveFront(); // retrieve next node to be examined |
83 |
path=node->Path(); |
84 |
for(int i=0; i<node->NumEntries(); i++) { // append all its children to the new list |
85 |
MTentry *e=(MTentry *)(*node)[i].Ptr(); |
86 |
|
87 |
path.MakeChild(e->Ptr()); |
88 |
newList->Append((MTnode *)ReadNode(path)); |
89 |
path.MakeParent(); |
90 |
} |
91 |
} |
92 |
delete oldList; |
93 |
oldList=newList; |
94 |
} while(node->Level()>level); // stop if we're at the split level |
95 |
delete node; |
96 |
while(!oldList->IsEmpty()) { // now append each sub-tree to its root |
97 |
MT *subtree=new MT (4096); |
98 |
assert (!"fixme: pagesize above taken wherefrom?"); |
99 |
MTnode *newnode; |
100 |
|
101 |
node=oldList->RemoveFront(); |
102 |
sprintf(newname, "%s.%i", name, ++(*ncreated)); |
103 |
unlink(newname); // if this M-tree already exists, delete it |
104 |
subtree->Create(newname); // create a new M-tree |
105 |
path.MakeRoot(); |
106 |
newnode=(MTnode *)subtree->ReadNode(path); // read the root of the tree |
107 |
subtree->Append(newnode, (MTnode *)node->Copy()); // append the sub-tree of current node to the root of this M-tree |
108 |
children->Append(node->Entry()); // insert the root entry into the list |
109 |
trees->Append(strdup(newname)); // insert the new M-tree name into the list |
110 |
delete node; |
111 |
delete newnode; |
112 |
delete subtree; |
113 |
} |
114 |
delete oldList; |
115 |
return trees; |
116 |
} |
117 |
|
118 |
// load this M-tree with n data using the BulkLoad algorithm [CP98] |
119 |
void |
120 |
MT::BulkLoad(MTentry **data, int n, double padFactor, char *name) |
121 |
// data is an array of n entries |
122 |
// padFactor is the maximum node utilization (use 1) |
123 |
// name is the name of the tree |
124 |
{ |
125 |
int i, Size=0, totSize, addEntrySize=(sizeofEntry()? sizeof(GiSTpage): sizeof(GiSTlte)+sizeof(GiSTpage)), minSize=(int)(Store()->PageSize()*MIN_UTIL), NumEntries; // this is the total size of entries |
126 |
|
127 |
if(sizeofEntry()) Size=n*(sizeof(GiSTpage)+sizeofEntry()); // (only valid if we've fixed size entries) |
128 |
else for(i=0; i<n; i++) Size+=sizeof(GiSTlte)+sizeof(GiSTpage)+data[i]->CompressedLength(); |
129 |
totSize=Size+GIST_PAGE_HEADER_SIZE+sizeof(GiSTlte); |
130 |
NumEntries=(int)(Store()->PageSize()*padFactor*n)/totSize; |
131 |
// cout << "exp. size=" << totSize << endl; |
132 |
if(totSize>Store()->PageSize()) { // we need to split the entries into several sub-trees |
133 |
GiSTlist<char *> nameslist, othernameslist; // list of the sub-trees names |
134 |
GiSTlist<MTentry *> plist, parentslist; // lists of the root entries of each sub-tree |
135 |
GiSTlist<int> *lists=NULL; // list of entries for each sub-tree |
136 |
GiSTlist<double> *dists=NULL; // list of distances for each sub-tree |
137 |
char **trees; // array of the sub-trees names |
138 |
GiSTlist<MTnode *> *oldList=new GiSTlist<MTnode *>; |
139 |
GiSTpath path; |
140 |
MTentry ***arrays; // array of the entries for each sub-tree |
141 |
MTentry **parents; // array of the root entries for each sub-tree |
142 |
MTnode *node=NULL; |
143 |
GiSTlist<double *> *distm; // distance matrix |
144 |
int s=(int)MAX(MIN(NumEntries, ceil(((float)n)/NumEntries)), NumEntries*MIN_UTIL); // initial number of samples |
145 |
int j, nsamples, *samples=new int[s], *sizes=NULL, *ns=NULL, ncreated=0, minLevel=MAXINT, nInit, l, iters=0, MAXITER=s*s; |
146 |
char newname[50]; |
147 |
BOOL *sampled=new BOOL[n]; // is this entry in the samples set? |
148 |
|
149 |
// cout << "NE*pF=" << NumEntries*padFactor << ", n/NE*pF=" << n/floor(NumEntries*padFactor) << ", s=" << s << endl; |
150 |
distm=(GiSTlist<double *> *)calloc(s,sizeof(GiSTlist<double *>)); |
151 |
do { // sampling phase |
152 |
iters++; |
153 |
if(iters>1) { // this is a new sampling phase |
154 |
// cout << "Re-sampling: " << iters << "/" << MAXITER << endl; |
155 |
while(!lists[0].IsEmpty()) { |
156 |
lists[0].RemoveFront(); |
157 |
dists[0].RemoveFront(); |
158 |
} |
159 |
delete []lists; |
160 |
delete []dists; |
161 |
delete []sizes; |
162 |
delete []ns; |
163 |
while(!distm[0].IsEmpty()) delete []distm[0].RemoveFront(); // empty the distance list |
164 |
for(i=1; i<s; i++) distm[i].front=distm[i].rear=NULL; |
165 |
} |
166 |
// if(iters>=MAXITER) minSize=0; |
167 |
if(iters>=MAXITER) { |
168 |
cout << "Too many loops in BulkLoad!\nPlease select a lower minimum node utilization or a bigger node size.\n"; |
169 |
exit(1); |
170 |
} |
171 |
nsamples=0; |
172 |
for(i=0; i<n; i++) sampled[i]=FALSE; |
173 |
// pick samples to create parents |
174 |
while(nsamples<s) { |
175 |
do i=PickRandom(0, n); |
176 |
while(sampled[i]); |
177 |
sampled[i]=TRUE; |
178 |
samples[nsamples++]=i; |
179 |
} |
180 |
// cout << "Samples:\n"; |
181 |
// for(i=0; i<s; i++) cout << "\t" << i << ":\t" << data[samples[i]]; |
182 |
lists=new GiSTlist<int>[s]; |
183 |
dists=new GiSTlist<double>[s]; |
184 |
sizes=new int[s]; |
185 |
ns=new int[s]; |
186 |
for(i=0; i<s; i++) { |
187 |
sizes[i]=GIST_PAGE_HEADER_SIZE+sizeof(GiSTlte); |
188 |
ns[i]=1; |
189 |
} |
190 |
for(i=0; i<s; i++) distm[i].Prepend(new double[s]); |
191 |
for(i=0; i<s; i++) { // compute the relative distances between samples |
192 |
for(j=0; j<i; j++) |
193 |
distm[j].front->entry[i]=(distm[i].front->entry[j]=data[samples[j]]->object().distance(data[samples[i]]->object())); |
194 |
distm[i].front->entry[i]=0; |
195 |
} |
196 |
for(i=0; i<n; i++) { // assign each entry to its nearest parent |
197 |
// cout << "Now assigning " << data[i]; |
198 |
if(sampled[i]) { |
199 |
for(j=0; samples[j]!=i; j++); |
200 |
lists[j].Prepend(i); // insert the entry in the right list |
201 |
dists[j].Prepend(0); |
202 |
sizes[j]+=addEntrySize+data[i]->CompressedLength(); |
203 |
// cout << "\tAssigned (0) to " << j << ", " << data[samples[j]]; |
204 |
} |
205 |
else { // here we optimize the distance computations (like we do in the insert algorithm) |
206 |
double *dist=new double[s]; |
207 |
int minindex=0; |
208 |
|
209 |
dist[0]=data[samples[0]]->object().distance(data[i]->object()); |
210 |
for(j=1; j<s; j++) { |
211 |
BOOL cont=TRUE; |
212 |
|
213 |
dist[j]=-1; |
214 |
if(fabs(data[samples[j]]->Key()->distance-data[i]->Key()->distance)>dist[minindex]) continue; // pruning for reference point (parent) |
215 |
for(int k=0; (k<j)&&cont; k++) // pruning for reference points (other samples) |
216 |
if(dist[k]<0) continue; |
217 |
else cont=(fabs(dist[k]-distm[j].front->entry[k])<dist[minindex]); |
218 |
if(!cont) continue; |
219 |
dist[j]=data[samples[j]]->object().distance(data[i]->object()); // we have to compute this distance |
220 |
if(dist[j]<dist[minindex]) minindex=j; |
221 |
} |
222 |
// cout << "\tAssigned (" << dist[minindex] << ") to " << minindex << ", " << data[samples[minindex]]; |
223 |
lists[minindex].Append(i); |
224 |
dists[minindex].Append(dist[minindex]); |
225 |
sizes[minindex]+=addEntrySize+data[i]->CompressedLength(); |
226 |
ns[minindex]++; |
227 |
if(sizes[minindex]>=minSize) delete []dist; |
228 |
else distm[minindex].Append(dist); |
229 |
} |
230 |
} |
231 |
// redistribute underfilled parents |
232 |
// cout << "Underfilled parents redistribution...\n"; |
233 |
while(sizes[i=FindMin(sizes, nsamples)]<minSize) { |
234 |
GiSTlist<int> list=lists[i]; |
235 |
GiSTlist<double *> dlist=distm[i]; |
236 |
|
237 |
while(!dists[i].IsEmpty()) dists[i].RemoveFront(); |
238 |
// cout << "Redistributing " << i << "' set (" << sizes[i] << "/" << minSize << ")...\n"; |
239 |
for(j=0; j<nsamples; j++) |
240 |
for(GiSTlistnode<double *> *lnode=distm[j].front; lnode; lnode=lnode->next) lnode->entry[i]=lnode->entry[nsamples-1]; |
241 |
// substitute this set with last set |
242 |
distm[i]=distm[nsamples-1]; |
243 |
lists[i]=lists[nsamples-1]; |
244 |
dists[i]=dists[nsamples-1]; |
245 |
samples[i]=samples[nsamples-1]; |
246 |
sizes[i]=sizes[nsamples-1]; |
247 |
ns[i]=ns[nsamples-1]; |
248 |
nsamples--; |
249 |
while(!list.IsEmpty()) { // assign each entry to its nearest parent |
250 |
double *d=dlist.RemoveFront(); |
251 |
int k=list.RemoveFront(), index, minindex=-1; |
252 |
// cout << "Now assigning " << data[k]; |
253 |
|
254 |
for(index=0; (index<nsamples)&&(minindex<0); index++) // search for a computed distance |
255 |
if(d[index]>0) minindex=index; |
256 |
if(minindex<0) { // no distance was computed (i.e. all distances were pruned) |
257 |
d[0]=data[samples[0]]->object().distance(data[k]->object()); |
258 |
minindex=0; |
259 |
} |
260 |
for(index=0; index<nsamples; index++) { |
261 |
BOOL cont=TRUE; |
262 |
|
263 |
if(index==minindex) continue; |
264 |
if(d[index]<0) { // distance wasn't computed |
265 |
if(fabs(data[samples[index]]->Key()->distance-data[k]->Key()->distance)>d[minindex]) continue; // pruning for reference point (parent) |
266 |
for(l=0; (l<index)&&cont; l++) // pruning for reference points (other samples) |
267 |
if(d[l]<0) continue; |
268 |
else cont=(fabs(d[l]-distm[index].front->entry[l])<d[minindex]); |
269 |
if(!cont) continue; |
270 |
d[index]=data[samples[index]]->object().distance(data[k]->object()); // we have to compute this distance |
271 |
} |
272 |
if(d[index]<d[minindex]) minindex=index; |
273 |
} |
274 |
// cout << "\tAssigned (" << d[minindex] << ") to " << minindex << ", " << data[samples[minindex]]; |
275 |
lists[minindex].Append(k); |
276 |
dists[minindex].Append(d[minindex]); |
277 |
sizes[minindex]+=addEntrySize+data[k]->CompressedLength(); |
278 |
ns[minindex]++; |
279 |
if(sizes[minindex]>=minSize) delete []d; |
280 |
else distm[minindex].Append(d); |
281 |
} |
282 |
assert(dlist.IsEmpty()); |
283 |
} |
284 |
} while(nsamples==1); // if there's only one child, repeat the sampling phase |
285 |
// cout << "Samples:\n"; |
286 |
// for(i=0; i<nsamples; i++) cout << "\t" << i << ": " << data[samples[i]]; |
287 |
arrays=new MTentry **[nsamples]; |
288 |
for(i=0; i<nsamples; i++) { // convert the lists into arrays |
289 |
arrays[i]=new MTentry *[ns[i]]; |
290 |
for(j=0; j<ns[i]; j++) { |
291 |
int k=lists[i].RemoveFront(); |
292 |
|
293 |
arrays[i][j]=(MTentry *)data[k]->Copy(); |
294 |
arrays[i][j]->Key()->distance=dists[i].RemoveFront(); |
295 |
} |
296 |
assert(lists[i].IsEmpty()); |
297 |
assert(dists[i].IsEmpty()); |
298 |
} |
299 |
delete []dists; |
300 |
delete []lists; |
301 |
for(i=0; i<nsamples; i++) |
302 |
while(!distm[i].IsEmpty()) |
303 |
delete [](distm[i].RemoveFront()); |
304 |
free(distm); |
305 |
// build an M-tree under each parent |
306 |
nInit=nsamples; |
307 |
// cout << "Now building " << nsamples << " sub-trees...\n"; |
308 |
MT *tree=new MT (4096); |
309 |
assert (!"fixme: pagesize above taken wherefrom?"); |
310 |
|
311 |
// for(i=0; i<nsamples; i++) cout << i+1 << "' set: " << ns[i] << endl; |
312 |
for(i=0; i<nInit; i++) { |
313 |
MTnode *root; |
314 |
GiSTpath path; |
315 |
|
316 |
sprintf(newname, "%s.%i", name, ++ncreated); |
317 |
unlink(newname); |
318 |
tree->Create(newname); // create the new sub-tree |
319 |
// cout << "Now building sub-tree " << newname << " on " << ns[i] << " data (exp. size=" << sizes[i] << ")...\n"; |
320 |
tree->BulkLoad(arrays[i], ns[i], padFactor, newname); // insert the data into the sub-tree |
321 |
// cout << "Tree level=" << tree->MaxLevel() << endl; |
322 |
// if the root node is underfilled, we have to split the tree |
323 |
path.MakeRoot(); |
324 |
root=(MTnode *)tree->ReadNode(path); |
325 |
if(root->IsUnderFull(*Store())) { |
326 |
GiSTlist<MTentry *> *roots=new GiSTlist<MTentry *>; |
327 |
GiSTlist<char *> *list=tree->SplitTree(&ncreated, tree->MaxLevel()-1, roots, name); // split the tree |
328 |
|
329 |
nsamples--; |
330 |
while(!list->IsEmpty()) { // insert all the new trees in the sub-trees list |
331 |
MTentry *e=roots->RemoveFront(); |
332 |
|
333 |
othernameslist.Append(list->RemoveFront()); |
334 |
for(j=0; j<n; j++) if(data[j]->object()==e->object()) { // append also the root entry to the parents list |
335 |
// cout << "parent=" << data[j]; |
336 |
plist.Append(data[j]); |
337 |
j=n; |
338 |
} |
339 |
delete e; |
340 |
nsamples++; |
341 |
} |
342 |
delete list; |
343 |
delete roots; |
344 |
minLevel=MIN(minLevel, tree->MaxLevel()-1); |
345 |
} |
346 |
else { |
347 |
char *tmp=new char[50]; |
348 |
|
349 |
strcpy(tmp, newname); |
350 |
othernameslist.Append(tmp); |
351 |
plist.Append(data[samples[i]]); |
352 |
minLevel=MIN(minLevel, tree->MaxLevel()); |
353 |
} |
354 |
delete root; |
355 |
tree->Close(); |
356 |
delete tree->Store(); // it was created in tree->Create() |
357 |
} |
358 |
for(i=0; i<nInit; i++) { |
359 |
for(j=0; j<ns[i]; j++) delete arrays[i][j]; |
360 |
delete []arrays[i]; |
361 |
} |
362 |
delete []arrays; |
363 |
while(!plist.IsEmpty()) { // insert the trees in the list (splitting if necessary) |
364 |
MTentry *parent=plist.RemoveFront(); |
365 |
char *tmp=othernameslist.RemoveFront(); |
366 |
|
367 |
strcpy(newname, tmp); |
368 |
delete []tmp; |
369 |
tree->Open(newname); |
370 |
// cout << ": Tree level=" << tree->MaxLevel() << " (" << minLevel << ")\n"; |
371 |
if(tree->MaxLevel()>minLevel) { // we have to split the tree to reduce its height |
372 |
// cout << "level too high!!! (min=" << minLevel << ") Splitting the tree...\n"; |
373 |
GiSTlist<MTentry *> *roots=new GiSTlist<MTentry *>; |
374 |
GiSTlist<char *> *list=tree->SplitTree(&ncreated, minLevel, roots, name); // split the tree |
375 |
|
376 |
nsamples--; |
377 |
while(!list->IsEmpty()) { // insert all the new trees in the sub-trees list |
378 |
MTentry *e=roots->RemoveFront(); |
379 |
|
380 |
nameslist.Append(list->RemoveFront()); |
381 |
for(j=0; j<n; j++) if(data[j]->object()==e->object()) { // append also the root entry to the parents list |
382 |
// cout << "parent=" << data[j]; |
383 |
parentslist.Append(data[j]); |
384 |
j=n; |
385 |
} |
386 |
delete e; |
387 |
nsamples++; |
388 |
} |
389 |
delete list; |
390 |
delete roots; |
391 |
} |
392 |
else { // simply insert the tree and its root to the lists |
393 |
char *tmp=new char[50]; |
394 |
|
395 |
strcpy(tmp, newname); |
396 |
nameslist.Append(tmp); |
397 |
// cout << "parent=" << data[samples[i]]; |
398 |
parentslist.Append(parent); |
399 |
} |
400 |
tree->Close(); |
401 |
delete tree->Store(); // it was created in tree->Open() |
402 |
} |
403 |
parents=new MTentry *[nsamples]; |
404 |
trees=new char *[nsamples]; |
405 |
for(i=0; i<nsamples; i++) { // convert the lists into arrays |
406 |
trees[i]=nameslist.RemoveFront(); |
407 |
parents[i]=parentslist.RemoveFront(); |
408 |
} |
409 |
// build the super-tree upon the parents |
410 |
sprintf(newname, "%s.0", name); |
411 |
// cout << "Now building super-tree " << newname << " on " << nsamples << " data...\n"; |
412 |
BulkLoad(parents, nsamples, padFactor, newname); |
413 |
// attach each sub-tree to the leaves of the super-tree |
414 |
path.MakeRoot(); |
415 |
node=(MTnode *)ReadNode(path); |
416 |
oldList->Append(node); |
417 |
// cout << "super-tree built!\n"; |
418 |
l=node->Level(); |
419 |
while(l>0) { // build the leaves list for super-tree |
420 |
GiSTlist<MTnode *> *newList=new GiSTlist<MTnode *>; |
421 |
|
422 |
while(!oldList->IsEmpty()) { |
423 |
node=oldList->RemoveFront(); |
424 |
path=node->Path(); |
425 |
node->SetLevel(node->Level()+minLevel); // update level of the upper nodes of the super-tree |
426 |
WriteNode(node); |
427 |
for(i=0; i<node->NumEntries(); i++) { |
428 |
MTentry *e=(MTentry *)(*node)[i].Ptr(); |
429 |
|
430 |
path.MakeChild(e->Ptr()); |
431 |
newList->Append((MTnode *)ReadNode(path)); |
432 |
path.MakeParent(); |
433 |
} |
434 |
delete node; |
435 |
} |
436 |
delete oldList; |
437 |
oldList=newList; |
438 |
l--; |
439 |
} |
440 |
// cout << "Finished " << newname << endl; |
441 |
while(!oldList->IsEmpty()) { // attach each sub-tree to its leaf |
442 |
GiSTpath rootpath; |
443 |
|
444 |
rootpath.MakeRoot(); |
445 |
node=oldList->RemoveFront(); // retrieve next leaf (root of sub tree) |
446 |
node->SetLevel(minLevel); // update level of the root of the sub-tree |
447 |
path=node->Path(); |
448 |
for(i=0; i<node->NumEntries(); i++) { |
449 |
MTnode *newnode=(MTnode *)CreateNode(); |
450 |
MTentry *e=(MTentry *)(*node)[i].Ptr(); |
451 |
GiSTpath newpath; |
452 |
|
453 |
path.MakeChild(Store()->Allocate()); |
454 |
newnode->Path()=path; |
455 |
e->SetPtr(path.Page()); |
456 |
path.MakeParent(); |
457 |
for(j=0; e->object()!=parents[j]->object(); j++); // search the tree to append |
458 |
tree->Open(trees[j]); |
459 |
// cout << "Now appending sub-tree " << trees[j] << endl; |
460 |
Append(newnode, (MTnode *)tree->ReadNode(rootpath)); // append this sub-tree to the super-tree |
461 |
tree->Close(); |
462 |
delete tree->Store(); // it was created in tree->Open() |
463 |
newpath=newnode->Path(); |
464 |
delete newnode; |
465 |
} |
466 |
WriteNode(node); |
467 |
delete node; |
468 |
} |
469 |
tree->Open(trees[0]); // in order to destroy the object tree |
470 |
delete tree; |
471 |
for(i=0; i<nsamples; i++) delete []trees[i]; |
472 |
delete []trees; |
473 |
// update radii of the upper nodes of the result M-tree |
474 |
path.MakeRoot(); |
475 |
node=(MTnode *)ReadNode(path); |
476 |
oldList->Append(node); |
477 |
l=node->Level(); |
478 |
while(l>=minLevel) { // build the list of the nodes which radii should be recomputed |
479 |
GiSTlist<MTnode *> *newList=new GiSTlist<MTnode *>; |
480 |
|
481 |
while(!oldList->IsEmpty()) { |
482 |
|
483 |
node=oldList->RemoveFront(); |
484 |
path=node->Path(); |
485 |
for(i=0; i<node->NumEntries(); i++) { |
486 |
MTentry *e=(MTentry *)(*node)[i].Ptr(); |
487 |
|
488 |
path.MakeChild(e->Ptr()); |
489 |
newList->Append((MTnode *)ReadNode(path)); |
490 |
path.MakeParent(); |
491 |
} |
492 |
delete node; |
493 |
} |
494 |
delete oldList; |
495 |
oldList=newList; |
496 |
l--; |
497 |
} |
498 |
while(!oldList->IsEmpty()) { // adjust the radii of the nodes |
499 |
MTnode *node=oldList->RemoveFront(); |
500 |
|
501 |
AdjKeys(node); |
502 |
delete node; |
503 |
} |
504 |
// be tidy... |
505 |
delete oldList; |
506 |
delete []parents; |
507 |
delete []sizes; |
508 |
delete []ns; |
509 |
delete []sampled; |
510 |
delete []samples; |
511 |
for(i=0; i<=ncreated; i++) { // delete all temporary sub-trees |
512 |
sprintf(newname, "%s.%i", name, i); |
513 |
unlink(newname); |
514 |
} |
515 |
} |
516 |
else { // we can insert all the entries in a single node |
517 |
GiSTpath path; |
518 |
GiSTnode *node; |
519 |
|
520 |
path.MakeRoot(); |
521 |
node=ReadNode(path); |
522 |
for(i=0; i<n; i++) |
523 |
node->Insert(*(data[i])); |
524 |
assert(!node->IsOverFull(*Store())); |
525 |
// cout << "real size=" << node->Size() << endl; |
526 |
WriteNode(node); |
527 |
delete node; |
528 |
} |
529 |
} |
530 |
|
531 |
// append the sub-tree rooted at from to the node to |
532 |
// this is used in the "append" phase of the BulkLoad algorithm |
533 |
void |
534 |
MT::Append(MTnode *to, MTnode *from) |
535 |
{ |
536 |
GiSTlist<MTnode *> *oldList=new GiSTlist<MTnode *>; // list of the nodes to append |
537 |
GiSTlist<GiSTpath> pathList; |
538 |
MT *fromtree=(MT *)from->Tree(); |
539 |
MTnode *node=new MTnode, *newnode; |
540 |
|
541 |
// cout << "Appending " << from; |
542 |
oldList->Append(from); |
543 |
pathList.Append(to->Path()); |
544 |
do { |
545 |
GiSTlist<MTnode *> *newList=new GiSTlist<MTnode *>; |
546 |
|
547 |
while(!oldList->IsEmpty()) { |
548 |
GiSTpath newpath=pathList.RemoveFront(); |
549 |
|
550 |
delete node; |
551 |
node=oldList->RemoveFront(); |
552 |
newnode=(MTnode *)ReadNode(newpath); |
553 |
// cout << "Inserting " << node->NumEntries() << " entries:\n"; |
554 |
for(int i=0; i<node->NumEntries(); i++) { |
555 |
MTentry *e=(MTentry *)(*node)[i].Ptr()->Copy(); |
556 |
|
557 |
if(node->Level()>0) { // if node isn't a leaf, we've to allocate its children |
558 |
GiSTpath nodepath=node->Path(); |
559 |
MTnode *childnode=(MTnode *)CreateNode(), *fromnode; |
560 |
|
561 |
nodepath.MakeChild(e->Ptr()); |
562 |
fromnode=(MTnode *)fromtree->ReadNode(nodepath); |
563 |
newList->Append(fromnode); |
564 |
e->SetPtr(Store()->Allocate()); |
565 |
newpath.MakeChild(e->Ptr()); |
566 |
childnode->Path()=newpath; |
567 |
childnode->SetTree(this); |
568 |
WriteNode(childnode); // write the empty node |
569 |
pathList.Append(newpath); |
570 |
newpath.MakeParent(); |
571 |
nodepath.MakeParent(); |
572 |
delete childnode; |
573 |
} |
574 |
newnode->Insert(*e); |
575 |
// cout << "\tInserted " << e; |
576 |
delete e; |
577 |
} |
578 |
newnode->SetLevel(node->Level()); |
579 |
WriteNode(newnode); // write the node |
580 |
// cout << "Created " << newnode; |
581 |
delete newnode; |
582 |
} |
583 |
delete oldList; |
584 |
oldList=newList; |
585 |
} while(node->Level()>0); // until we reach the leaves' level |
586 |
// cout << node; |
587 |
delete node; |
588 |
delete oldList; |
589 |
} |
590 |
|
591 |
// adjust the keys of node |
592 |
// this is used during the final phase of the BulkLoad algorithm |
593 |
void |
594 |
MT::AdjKeys(GiSTnode *node) |
595 |
{ |
596 |
GiSTnode *P; |
597 |
GiSTpath parent_path=node->Path(); |
598 |
GiSTentry *entry, *actual; |
599 |
|
600 |
if(node->Path().IsRoot()) return; |
601 |
parent_path.MakeParent(); |
602 |
P=ReadNode(parent_path); |
603 |
entry=P->SearchPtr(node->Path().Page()); |
604 |
assert(entry!=NULL); |
605 |
actual=node->Union(); |
606 |
actual->SetPtr(node->Path().Page()); |
607 |
((MTkey *)actual->Key())->distance=((MTkey *)entry->Key())->distance; // necessary to keep track of the distance from the parent |
608 |
if(!entry->IsEqual(*actual)) { // replace this entry |
609 |
int pos=entry->Position(); |
610 |
|
611 |
P->DeleteEntry(pos); |
612 |
P->Insert(*actual); |
613 |
/* if(P->IsOverFull(*Store())) { // this code should be unnecessary (if we have fixed length entries) |
614 |
GiSTpage page=node->Path().Page(); |
615 |
|
616 |
Split(&P, *actual); |
617 |
node->Path()=P->Path(); |
618 |
node->Path().MakeChild(page); |
619 |
} |
620 |
else { |
621 |
WriteNode(P); |
622 |
AdjKeys(P); |
623 |
} */ |
624 |
WriteNode(P); |
625 |
AdjKeys(P); |
626 |
} |
627 |
delete P; |
628 |
delete actual; |
629 |
delete entry; |
630 |
} |