package folder; BEGIN { *slog = \&::slog }; use Fcntl; use File::Sync (); use Inline Config => NAME => "syncmail::folder"; use Inline C; use constant MDIFVERSION => 1; BEGIN { if (1) { use OpenSSL (); *hash = \&OpenSSL::Digest::sha1_hex; } elsif (0) { # use Digest::SHA1 (); my $digest = new Digest::SHA1; *hash = sub { $digest->reset; $digest->add(@_); $mid = $digest->hexdigest; }; } } sub fflush { my $oldfh = select $_[0]; $| = 1; $| = 0; select $oldfh; } # rename a file and fsync the directory sub replace { my ($src, $dst) = @_; my $self = shift; rename $src, $dst; $dst =~ s/[^\/]*$/./; # now sync the directory open my $dir, "<", $dst or die "$dst: $!"; File::Sync::fsync($dir); } sub new { my $class = shift; my %arg = @_; my $self = bless { path => "$::PREFIX/$arg{name}", %arg, }, $class; $self->open(0); $self; } sub dirty { $_[0]{dirty} = 1; } sub DESTROY { #$_[0]->write_mdif; # do NOT! } # parse_mbox(mbox-file-path, callback) # callback gets called with \$header and \$body, # $header includes the mbox From_ line without # the leading From_ itself. sub parse_mbox { my ($fh, $cb) = @_; local $/ = "\n\n"; my ($head, $body, $offs); read $fh, $head, 5; $head eq "From " or $head eq "" or return; $offs = 0; while (defined ($head = <$fh>)) { $head =~ /^.*? [A-Z][a-z][a-z] [A-Z][a-z][a-z] [ 0-9][0-9] \d\d:\d\d:\d\d(?: [+-]\d\d\d\d)? \d\d(?:\d\d)\n/ or die "$path: not standard mbox format header:\n$head\n"; local $/ = "\nFrom "; # NEVER enable this. content-length simply is broken by design #if ($head =~ /^Content-Length:\s+(\d+)$/im) { # $1 <= read $fh, $body, $1 + 5 # or die "$path: partial message in mbox"; #} else { $body = <$fh>; #} chomp $body; $cb->($offs, \$head, \$body); $offs = (tell $fh) - 5; &::give unless ++$ecnt & 255; } 1; } sub conf_path { (my $conf = $_[0]{path}) =~ s%([^/]+$)%.$1.mdif%; $conf; } sub read_mdif { my $self = shift; my $path = $self->conf_path; return if $self->{idx}; open my $fh, "<", $path or return; defined ($_ = <$fh>) or die "$path: empty mdif file\n"; do { if ($_ eq "[SYNCMAIL]\n") { while (<$fh>) { last unless /^([a-z]+)\s*=\s*(.*)\n$/; $self->{$1} = $2; } } elsif ($_ eq "[HOSTS]\n") { while (<$fh>) { last unless /^([^[].*)=(.*)\n$/; $self->{host}{$1} = $2; } } elsif (/^\[DIFF (\d+)\]\n$/) { my $mtime = $1; my (@add, @del); while (<$fh>) { last unless /^([+-])(.*)\n$/; if ($1 eq "+") { push @add, $2; } else { push @del, $2; } } unshift @{$self->{diff}}, [$mtime, \@add, \@del]; } elsif ($_ eq "[INDEX]\n") { my @idx; while (<$fh>) { last unless /^(\d+)=(.*)\n$/; push @idx, [$1, $2]; } $self->{idx} = \@idx; } elsif (/^#/) { $_ = <$fh>; # nop } else { die "$path: unparseable section '$_'\n"; } } while defined $_; $self->{version} <= MDIFVERSION or die "$path: version mismatch ($self->{version} found, <".MDIFVERSION." expected)\n"; } sub write_mdif { my $self = shift; my $path = $self->conf_path; return unless $self->{dirty}; open my $fh, ">", "$path~" or die "$path~: $!"; print $fh "# automatically generated, do NOT edit\n"; print $fh "[SYNCMAIL]\n"; print $fh "$_=$self->{$_}\n" for (qw(fsize mtime version ctime)); print $fh "[HOSTS]\n"; while (my ($k,$v) = each %{$self->{host}}) { print $fh "$k=$v\n"; } print $fh "[INDEX]\n"; print $fh "$_->[0]=$_->[1]\n" for @{$self->{idx}}; for (reverse @{$self->{diff}}) { print $fh "[DIFF $_->[0]]\n"; print $fh "+$_\n" for @{$_->[1]}; print $fh "-$_\n" for @{$_->[2]}; } fflush($fh); File::Sync::fsync($fh); close $fh or die "$path~: unable to create updated .mdif: $!"; replace("$path~", $path); delete $self->{dirty}; } sub gendiff { my ($self, $d1, $d2) = @_; my (@add, @del); my (%d1, %d2); for (@$d2) { undef $d2{$_->[1]}; } # delete msgs in d1 but not in d2 for (@$d1) { undef $d1{$_->[1]}; push @del, $_->[1] unless exists $d2{$_->[1]}; } %d2 = (); # conserve memory # add msgs in d2 but not in d1 for (@$d2) { push @add, $_->[1] unless exists $d1{$_->[1]}; } push @{$self->{diff}}, [ $self->{ctime}, \@add, \@del, ] if @add || @del; } sub open { my ($self, $rw) = @_; if (!$self->{fh} || $self->{rw} != $rw) { $self->close; $self->{rw} = $rw; sysopen $self->{fh}, $self->{path}, O_CREAT | ($rw ? O_RDWR : O_RDONLY), 0666 or die "$self->{path}: $!"; 0 == setlkw(fileno $self->{fh}, $rw ? 2 : 1) or die "$self->{path}: $!"; $self->check; } } sub close { my $self = shift; if ($self->{rw} && $self->{fh}) { fflush($self->{fh}); File::Sync::fsync($self->{fh}); } $self->write_mdif; delete $self->{fh}; } sub check { my $self = shift; my $conf = $self->conf_path; my $guard = $::lockdisk->guard; slog 3, "checking $self->{path}\n"; my ($fsize, $mtime) = (stat $self->{fh})[7, 9]; if ($self->{idx}) { return 1 if $fsize == $self->{fsize} && $mtime == $self->{mtime}; } else { if (open my $fh, "<", $conf) { my %conf; <$fh>; # skip initial comment <$fh> eq "[SYNCMAIL]\n" or die "$conf: format error"; while (<$fh> =~ /^([a-z]+)\s*=\s*(.*)$/) { $conf{$1} = $2; } return 1 if $fsize == $conf{fsize} && $mtime == $conf{mtime}; $conf{mtime} <= $mtime or die "$self->{path}: folder older than mdif"; } } slog 2, "updating $self->{path}\n"; my @idx; seek $self->{fh}, 0, SEEK_SET; parse_mbox $self->{fh}, sub { my ($offs, $head, $body) = @_; push @idx, [$offs, hash($$head, "\n\n", $$body)]; } or die "$self->{path}: no valid mbox file"; $self->read_mdif; $self->{version} ||= MDIFVERSION; $self->{ctime} = time; $self->gendiff($self->{idx}, \@idx); $self->{fsize} = $fsize; $self->{mtime} = $mtime; $self->{idx} = \@idx; $self->dirty; $self->write_mdif;#d# } sub inventory { hash sort map { $_->[1] } @{$_[0]{idx}}; } sub iidx { my $self = shift; $self->{iidx} ||= do { my %iidx; my $idx = $self->{idx}; push @$idx, [$self->{fsize}]; my $ofs = 0; for (0 .. @$idx - 2) { $iidx{$idx->[$_][1]} = [$idx->[$_][0], $idx->[$_+1][0] - $idx->[$_][0]]; } pop @$idx, [$self->{fsize}]; \%iidx; }; } sub exists { $_[0]->iidx unless $_[0]{iidx}; return $_[0]{iidx}{$_[1]}; } sub fetch { my ($self, $hash) = @_; $self->iidx unless $self->{iidx}; my $mail; my $msg = $self->{iidx}{$hash} or die "$hash: no such message in $self->{path}"; print STDERR "$self->{fh}, $msg->[0], SEEK_SET\n";#d# seek $self->{fh}, $msg->[0], SEEK_SET or die "$self->{path}: $!"; print STDERR "$msg->[1] == read $self->{fh}, $msg->[1]\n";#d# $msg->[1] == read $self->{fh}, $mail, $msg->[1] or die "$self->{path}: $!"; $mail =~ /^From \S/ or die "$self->{path}: mail folder corrupted"; $mail; } # begin updating folder sub begin_update { my $self = shift; $self->{oidx} = $self->{idx}; } sub delete { my $self = shift; my $temp = "$self->{path}~"; $self->iidx unless $self->{iidx}; for (@_) { if (exists $self->{iidx}{$_}) { # at least one message exists my $guard = $::lockdisk->guard; my %del; @del{@_} = (); my @nidx; my $dofs = 0; open my $fh, "+>", $temp or die "$temp: $!"; eval { 0 == setlkw(fileno $fh, 2) or die "$self->{path}~: $!"; $self->{fsize} = 0; # we virtually truncated the file for (@{delete $self->{idx}}) { my $hash = $_->[1]; my $buf; unless (exists $del{$hash}) { my ($ofs, $len) = @{$self->{iidx}{$hash}}; $len or die; seek $self->{fh}, $ofs, SEEK_SET or die "$self->{path}: $!"; $len == read $self->{fh}, $buf, $len or die "$self->{path}: $!"; $buf =~ /^From \S/ or die "$self->{path}: corrupted mail folder"; print $fh $buf or die "$self->{path}: $!"; push @nidx, [$dofs, $hash]; $self->{iidx}{$hash}[0] = $dofs; $dofs += $len; &::give unless ++$ecnt & 255; } else { delete $self->{iidx}{$hash}; slog 0, "skipping/deleting $hash\n"; } } }; if ($@) { close $fh; unlink $temp; die; } fflush($fh); File::Sync::fsync($fh); replace $temp, $self->{path}; $self->{fh} = $fh; $self->{rw} = 1; delete $self->{iidx}; $self->{idx} = \@nidx; $self->{fsize} = $dofs; return; } } } sub append { my ($self, $hash, $mail) = @_; if (length $mail) { $self->open(1); seek $self->{fh}, $self->{fsize}, SEEK_SET or die "$self->{path}: $!"; print {$self->{fh}} $mail or die "$self->{path}: $!"; push @{$self->{idx}}, [$self->{fsize}, $hash]; $self->{fsize} += length $mail; } } sub end_update { my $self = shift; $self->gendiff((delete $self->{oidx}), $self->{idx}); fflush($self->{fh}); File::Sync::fsync($self->{fh}); stat $self->{fh} or die "$self->{path}: $!"; $self->{fsize} = (stat _)[7]; $self->{mtime} = (stat _)[9]; $self->dirty; } 1; __DATA__ __C__ #include #include /* mode0 unlock, mode1 rlock, mode2 rwlock */ int setlkw(int fd, int mode) { struct flock l; l.l_type = mode == 0 ? F_UNLCK : mode == 1 ? F_RDLCK : F_WRLCK; l.l_whence = SEEK_SET; l.l_start = 0; l.l_len = 0; return fcntl (fd, F_SETLKW, &l); }