1 |
package folder; |
2 |
|
3 |
BEGIN { *slog = \&::slog }; |
4 |
|
5 |
use Fcntl; |
6 |
use File::Sync (); |
7 |
|
8 |
use Inline Config => NAME => "syncmail::folder"; |
9 |
use Inline C; |
10 |
|
11 |
use constant MDIFVERSION => 1; |
12 |
|
13 |
BEGIN { |
14 |
if (1) { |
15 |
use OpenSSL (); |
16 |
*hash = \&OpenSSL::Digest::sha1_hex; |
17 |
} elsif (0) { |
18 |
# use Digest::SHA1 (); |
19 |
my $digest = new Digest::SHA1; |
20 |
*hash = sub { |
21 |
$digest->reset; |
22 |
$digest->add(@_); |
23 |
$mid = $digest->hexdigest; |
24 |
}; |
25 |
} |
26 |
} |
27 |
|
28 |
sub flushfh { |
29 |
my $oldfh = select $_[0]; |
30 |
$| = 1; |
31 |
select $oldfh; |
32 |
} |
33 |
|
34 |
# rename a file and fsync the directory |
35 |
sub replace { |
36 |
my ($src, $dst) = @_; |
37 |
my $self = shift; |
38 |
|
39 |
rename $src, $dst; |
40 |
|
41 |
$dst =~ s/[^\/]*$/./; |
42 |
|
43 |
# now sync the directory |
44 |
|
45 |
open my $dir, "<", $dst |
46 |
or die "$dst: $!"; |
47 |
|
48 |
File::Sync::fsync($dir); |
49 |
} |
50 |
|
51 |
sub new { |
52 |
my $class = shift; |
53 |
my %arg = @_; |
54 |
my $self = bless { |
55 |
path => "$::PREFIX/$arg{name}", |
56 |
%arg, |
57 |
}, $class; |
58 |
$self->open(0); |
59 |
$self; |
60 |
} |
61 |
|
62 |
sub dirty { |
63 |
$_[0]{dirty} = 1; |
64 |
} |
65 |
|
66 |
sub DESTROY { |
67 |
#$_[0]->write_mdif; # do NOT! |
68 |
} |
69 |
|
70 |
# parse_mbox(mbox-file-path, callback) |
71 |
# callback gets called with \$header and \$body, |
72 |
# $header includes the mbox From_ line without |
73 |
# the leading From_ itself. |
74 |
sub parse_mbox { |
75 |
my ($fh, $cb) = @_; |
76 |
|
77 |
local $/ = "\n\n"; |
78 |
|
79 |
my ($head, $body, $offs); |
80 |
|
81 |
read $fh, $head, 5; |
82 |
|
83 |
$head eq "From " or $head eq "" |
84 |
or return; |
85 |
|
86 |
$offs = 0; |
87 |
while (defined ($head = <$fh>)) { |
88 |
$head =~ /^.*? [A-Z][a-z][a-z] [A-Z][a-z][a-z] [ 0-9][0-9] \d\d:\d\d:\d\d(?: [+-]\d\d\d\d)? \d\d(?:\d\d)\n/ |
89 |
or die "$path: not standard mbox format header:\n$head\n"; |
90 |
|
91 |
local $/ = "\nFrom "; |
92 |
# NEVER enable this. content-length simply is broken by design |
93 |
#if ($head =~ /^Content-Length:\s+(\d+)$/im) { |
94 |
# $1 <= read $fh, $body, $1 + 5 |
95 |
# or die "$path: partial message in mbox"; |
96 |
#} else { |
97 |
$body = <$fh>; |
98 |
#} |
99 |
chomp $body; |
100 |
$cb->($offs, \$head, \$body); |
101 |
$offs = (tell $fh) - 5; |
102 |
&::give unless ++$ecnt & 255; |
103 |
} |
104 |
|
105 |
1; |
106 |
} |
107 |
|
108 |
sub conf_path { |
109 |
(my $conf = $_[0]{path}) =~ s%([^/]+$)%.$1.mdif%; |
110 |
$conf; |
111 |
} |
112 |
|
113 |
sub read_mdif { |
114 |
my $self = shift; |
115 |
my $path = $self->conf_path; |
116 |
|
117 |
return if $self->{idx}; |
118 |
|
119 |
open my $fh, "<", $path |
120 |
or return; |
121 |
|
122 |
defined ($_ = <$fh>) |
123 |
or die "$path: empty mdif file\n"; |
124 |
|
125 |
do { |
126 |
if ($_ eq "[SYNCMAIL]\n") { |
127 |
while (<$fh>) { |
128 |
last unless /^([a-z]+)\s*=\s*(.*)\n$/; |
129 |
$self->{$1} = $2; |
130 |
} |
131 |
} elsif ($_ eq "[HOSTS]\n") { |
132 |
while (<$fh>) { |
133 |
last unless /^([^[].*)=(.*)\n$/; |
134 |
$self->{host}{$1} = $2; |
135 |
} |
136 |
} elsif (/^\[DIFF (\d+)\]\n$/) { |
137 |
my $mtime = $1; |
138 |
my (@add, @del); |
139 |
while (<$fh>) { |
140 |
last unless /^([+-])(.*)\n$/; |
141 |
if ($1 eq "+") { |
142 |
push @add, $2; |
143 |
} else { |
144 |
push @del, $2; |
145 |
} |
146 |
} |
147 |
unshift @{$self->{diff}}, [$mtime, \@add, \@del]; |
148 |
} elsif ($_ eq "[INDEX]\n") { |
149 |
my @idx; |
150 |
while (<$fh>) { |
151 |
last unless /^(\d+)=(.*)\n$/; |
152 |
push @idx, [$1, $2]; |
153 |
} |
154 |
$self->{idx} = \@idx; |
155 |
} elsif (/^#/) { |
156 |
$_ = <$fh>; |
157 |
# nop |
158 |
} else { |
159 |
die "$path: unparseable section '$_'\n"; |
160 |
} |
161 |
} while defined $_; |
162 |
|
163 |
$self->{version} <= MDIFVERSION |
164 |
or die "$path: version mismatch ($self->{version} found, <".MDIFVERSION." expected)\n"; |
165 |
} |
166 |
|
167 |
sub write_mdif { |
168 |
my $self = shift; |
169 |
my $path = $self->conf_path; |
170 |
|
171 |
return unless $self->{dirty}; |
172 |
|
173 |
open my $fh, ">", "$path~" |
174 |
or die "$path~: $!"; |
175 |
|
176 |
print $fh "# automatically generated, do NOT edit\n"; |
177 |
|
178 |
print $fh "[SYNCMAIL]\n"; |
179 |
print $fh "$_=$self->{$_}\n" for (qw(fsize mtime version ctime)); |
180 |
|
181 |
print $fh "[HOSTS]\n"; |
182 |
while (my ($k,$v) = each %{$self->{host}}) { |
183 |
print $fh "$k=$v\n"; |
184 |
} |
185 |
|
186 |
print $fh "[INDEX]\n"; |
187 |
print $fh "$_->[0]=$_->[1]\n" for @{$self->{idx}}; |
188 |
|
189 |
for (reverse @{$self->{diff}}) { |
190 |
print $fh "[DIFF $_->[0]]\n"; |
191 |
print $fh "+$_\n" for @{$_->[1]}; |
192 |
print $fh "-$_\n" for @{$_->[2]}; |
193 |
} |
194 |
|
195 |
flushfh($fh); |
196 |
File::Sync::fsync($fh); |
197 |
close $fh |
198 |
or die "$path~: unable to create updated .mdif: $!"; |
199 |
|
200 |
replace("$path~", $path); |
201 |
|
202 |
delete $self->{dirty}; |
203 |
} |
204 |
|
205 |
sub gendiff { |
206 |
my ($self, $d1, $d2) = @_; |
207 |
|
208 |
my (@add, @del); |
209 |
my (%d1, %d2); |
210 |
|
211 |
for (@$d2) { |
212 |
undef $d2{$_->[1]}; |
213 |
} |
214 |
|
215 |
# delete msgs in d1 but not in d2 |
216 |
for (@$d1) { |
217 |
undef $d1{$_->[1]}; |
218 |
push @del, $_->[1] unless exists $d2{$_->[1]}; |
219 |
} |
220 |
%d2 = (); # conserve memory |
221 |
|
222 |
# add msgs in d2 but not in d1 |
223 |
for (@$d2) { |
224 |
push @add, $_->[1] unless exists $d1{$_->[1]}; |
225 |
} |
226 |
|
227 |
push @{$self->{diff}}, [ |
228 |
$self->{ctime}, |
229 |
\@add, \@del, |
230 |
] if @add || @del; |
231 |
} |
232 |
|
233 |
sub open { |
234 |
my ($self, $rw) = @_; |
235 |
|
236 |
if (!$self->{fh} || $self->{rw} != $rw) { |
237 |
$self->close; |
238 |
$self->{rw} = $rw; |
239 |
sysopen $self->{fh}, $self->{path}, |
240 |
O_CREAT | ($rw ? O_RDWR : O_RDONLY), |
241 |
0666 |
242 |
or die "$self->{path}: $!"; |
243 |
0 == setlkw(fileno $self->{fh}, $rw ? 2 : 1) |
244 |
or die "$self->{path}: $!"; |
245 |
|
246 |
$self->check; |
247 |
} |
248 |
} |
249 |
|
250 |
sub close { |
251 |
my $self = shift; |
252 |
|
253 |
if ($self->{rw} && $self->{fh}) { |
254 |
flushfh($self->{fh}); |
255 |
File::Sync::fsync($self->{fh}); |
256 |
} |
257 |
|
258 |
$self->write_mdif; |
259 |
|
260 |
delete $self->{fh}; |
261 |
} |
262 |
|
263 |
sub check { |
264 |
my $self = shift; |
265 |
my $conf = $self->conf_path; |
266 |
my $guard = $::lockdisk->guard; |
267 |
|
268 |
slog 3, "checking $self->{path}\n"; |
269 |
|
270 |
my ($fsize, $mtime) = (stat $self->{fh})[7, 9]; |
271 |
|
272 |
if ($self->{idx}) { |
273 |
return 1 if $fsize == $self->{fsize} |
274 |
&& $mtime == $self->{mtime}; |
275 |
} else { |
276 |
if (open my $fh, "<", $conf) { |
277 |
my %conf; |
278 |
<$fh>; # skip initial comment |
279 |
<$fh> eq "[SYNCMAIL]\n" |
280 |
or die "$conf: format error"; |
281 |
while (<$fh> =~ /^([a-z]+)\s*=\s*(.*)$/) { |
282 |
$conf{$1} = $2; |
283 |
} |
284 |
return 1 if $fsize == $conf{fsize} |
285 |
&& $mtime == $conf{mtime}; |
286 |
|
287 |
$conf{mtime} <= $mtime |
288 |
or die "$self->{path}: folder older than mdif"; |
289 |
} |
290 |
} |
291 |
|
292 |
slog 2, "updating $self->{path}\n"; |
293 |
|
294 |
my @idx; |
295 |
|
296 |
seek $self->{fh}, 0, SEEK_SET; |
297 |
|
298 |
parse_mbox $self->{fh}, sub { |
299 |
my ($offs, $head, $body) = @_; |
300 |
push @idx, [$offs, hash($$head, "\n\n", $$body)]; |
301 |
} or die "$self->{path}: no valid mbox file"; |
302 |
|
303 |
$self->read_mdif; |
304 |
|
305 |
$self->{version} ||= MDIFVERSION; |
306 |
$self->{ctime} = time; |
307 |
|
308 |
$self->gendiff($self->{idx}, \@idx); |
309 |
|
310 |
$self->{fsize} = $fsize; |
311 |
$self->{mtime} = $mtime; |
312 |
$self->{idx} = \@idx; |
313 |
|
314 |
$self->dirty; |
315 |
$self->write_mdif;#d# |
316 |
} |
317 |
|
318 |
sub inventory { |
319 |
hash sort map { $_->[1] } @{$_[0]{idx}}; |
320 |
} |
321 |
|
322 |
sub iidx { |
323 |
my $self = shift; |
324 |
|
325 |
$self->{iidx} ||= do { |
326 |
my %iidx; |
327 |
my $idx = $self->{idx}; |
328 |
|
329 |
push @$idx, [$self->{fsize}]; |
330 |
my $ofs = 0; |
331 |
for (0 .. @$idx - 2) { |
332 |
$iidx{$idx->[$_][1]} = [$idx->[$_][0], $idx->[$_+1][0] - $idx->[$_][0]]; |
333 |
} |
334 |
pop @$idx, [$self->{fsize}]; |
335 |
|
336 |
\%iidx; |
337 |
}; |
338 |
} |
339 |
|
340 |
sub exists { |
341 |
$_[0]->iidx unless $_[0]{iidx}; |
342 |
return $_[0]{iidx}{$_[1]}; |
343 |
} |
344 |
|
345 |
sub fetch { |
346 |
my ($self, $hash) = @_; |
347 |
|
348 |
$self->iidx unless $self->{iidx}; |
349 |
|
350 |
my $mail; |
351 |
|
352 |
my $msg = $self->{iidx}{$hash} |
353 |
or die "$hash: no such message in $self->{path}"; |
354 |
|
355 |
seek $self->{fh}, $msg->[0], SEEK_SET |
356 |
or die "$self->{path}: $!"; |
357 |
|
358 |
$msg->[1] == read $self->{fh}, $mail, $msg->[1] |
359 |
or die "$self->{path}: $!"; |
360 |
|
361 |
$mail =~ /^From \S/ |
362 |
or die "$self->{path}: mail folder corrupted"; |
363 |
|
364 |
$mail; |
365 |
} |
366 |
|
367 |
# begin updating folder |
368 |
sub begin_update { |
369 |
my $self = shift; |
370 |
|
371 |
$self->{oidx} = $self->{idx}; |
372 |
} |
373 |
|
374 |
sub delete { |
375 |
my $self = shift; |
376 |
my $temp = "$self->{path}~"; |
377 |
|
378 |
$self->iidx unless $self->{iidx}; |
379 |
|
380 |
for (@_) { |
381 |
if (exists $self->{iidx}{$_}) { # at least one message exists |
382 |
my $guard = $::lockdisk->guard; |
383 |
my %del; @del{@_} = (); |
384 |
my @nidx; |
385 |
|
386 |
open my $fh, ">", $temp |
387 |
or die "$temp: $!"; |
388 |
|
389 |
eval { |
390 |
0 == setlkw(fileno $fh, 2) |
391 |
or die "$self->{path}~: $!"; |
392 |
|
393 |
$self->{fsize} = 0; # we virtually truncated the file |
394 |
|
395 |
my $dofs = 0; |
396 |
for (@{delete $self->{idx}}) { |
397 |
my $hash = $_->[1]; |
398 |
my $buf; |
399 |
|
400 |
unless (exists $del{$hash}) { |
401 |
my ($ofs, $len) = @{$self->{iidx}{$hash}}; |
402 |
|
403 |
$len or die; |
404 |
|
405 |
seek $self->{fh}, $ofs, SEEK_SET |
406 |
or die "$self->{path}: $!"; |
407 |
|
408 |
$len == read $self->{fh}, $buf, $len |
409 |
or die "$self->{path}: $!"; |
410 |
|
411 |
$buf =~ /^From \S/ |
412 |
or die "$self->{path}: corrupted mail folder"; |
413 |
|
414 |
print $fh $buf |
415 |
or die "$self->{path}: $!"; |
416 |
|
417 |
push @nidx, [$dofs, $hash]; |
418 |
$self->{iidx}{$hash}[0] = $dofs; |
419 |
$dofs += $len; |
420 |
|
421 |
&::give unless ++$ecnt & 255; |
422 |
} else { |
423 |
delete $self->{iidx}{$hash}; |
424 |
slog 0, "skipping/deleting $hash\n"; |
425 |
} |
426 |
} |
427 |
}; |
428 |
|
429 |
if ($@) { |
430 |
close $fh; |
431 |
unlink $temp; |
432 |
die; |
433 |
} |
434 |
|
435 |
File::Sync::fsync($fh); |
436 |
replace $temp, $self->{path}; |
437 |
|
438 |
$self->{fh} = $fh; |
439 |
$self->{rw} = 1; |
440 |
|
441 |
delete $self->{iidx}; |
442 |
|
443 |
$self->{idx} = \@nidx; |
444 |
$self->{fsize} = $ofs; |
445 |
|
446 |
return; |
447 |
} |
448 |
} |
449 |
} |
450 |
|
451 |
sub append { |
452 |
my ($self, $hash, $mail) = @_; |
453 |
|
454 |
if (length $mail) { |
455 |
$self->open(1); |
456 |
|
457 |
seek $self->{fh}, $self->{fsize}, SEEK_SET |
458 |
or die "$self->{path}: $!"; |
459 |
|
460 |
print {$self->{fh}} $mail |
461 |
or die "$self->{path}: $!"; |
462 |
|
463 |
push @{$self->{idx}}, [$self->{fsize}, $hash]; |
464 |
$self->{fsize} += length $mail; |
465 |
} |
466 |
} |
467 |
|
468 |
sub end_update { |
469 |
my $self = shift; |
470 |
|
471 |
$self->gendiff((delete $self->{oidx}, $self->{idx})); |
472 |
|
473 |
flushfh($self->{fh}); |
474 |
File::Sync::fsync($self->{fh}); |
475 |
|
476 |
stat $self->{fh} |
477 |
or die "$self->{path}: $!"; |
478 |
|
479 |
$self->{fsize} = (stat _)[7]; |
480 |
$self->{mtime} = (stat _)[9]; |
481 |
|
482 |
$self->dirty; |
483 |
} |
484 |
|
485 |
1; |
486 |
|
487 |
__DATA__ |
488 |
__C__ |
489 |
#include <unistd.h> |
490 |
#include <fcntl.h> |
491 |
|
492 |
/* mode0 unlock, mode1 rlock, mode2 rwlock */ |
493 |
int setlkw(int fd, int mode) |
494 |
{ |
495 |
struct flock l; |
496 |
|
497 |
l.l_type = mode == 0 ? F_UNLCK |
498 |
: mode == 1 ? F_RDLCK |
499 |
: F_WRLCK; |
500 |
l.l_whence = SEEK_SET; |
501 |
l.l_start = 0; |
502 |
l.l_len = 0; |
503 |
|
504 |
return fcntl (fd, F_SETLKW, &l); |
505 |
} |
506 |
|