1 |
#!/opt/bin/perl |
2 |
|
3 |
my ($cachemeta, $cachedata, $sizegb, $orig, $apply) = @ARGV |
4 |
or die "Usage: $0 /cache/meta /cache/data size_gb origdevice\n"; |
5 |
|
6 |
use common::sense; |
7 |
|
8 |
use IO::AIO (); |
9 |
use Coro::AIO (); |
10 |
|
11 |
use Linux::NBD; |
12 |
use Linux::NBD::Server; |
13 |
|
14 |
use POSIX (); |
15 |
|
16 |
use IO::Socket::UNIX; |
17 |
|
18 |
our $SYNC = 1; |
19 |
our $NTFS_COMPRESSION = 1; |
20 |
|
21 |
sub BLKSIZE() { 512 } |
22 |
|
23 |
my $blocks = qx<blockdev --getsz \Q$orig> * 512 / BLKSIZE; |
24 |
|
25 |
sysopen my $orig_fh, $orig, IO::AIO::O_RDWR |
26 |
or die "$orig: $!"; |
27 |
|
28 |
sysopen my $meta_fh, "$cachemeta", IO::AIO::O_RDWR | IO::AIO::O_CREAT |
29 |
or die "$cachemeta: $!"; |
30 |
|
31 |
sysopen my $data_fh, "$cachedata", IO::AIO::O_RDWR | IO::AIO::O_CREAT |
32 |
or die "$cachedata: $!"; |
33 |
|
34 |
my $cacheblocks = $sizegb * 1024 * 1024 * 1024 / BLKSIZE; |
35 |
|
36 |
my $meta_data; |
37 |
my $data_data; |
38 |
|
39 |
sub mapin { |
40 |
Coro::AIO::aio_truncate $meta_fh, $blocks * 4; |
41 |
Coro::AIO::aio_truncate $data_fh, $cacheblocks * BLKSIZE; |
42 |
|
43 |
IO::AIO::mmap $meta_data, $blocks * 4 , IO::AIO::PROT_READ | IO::AIO::PROT_WRITE, IO::AIO::MAP_SHARED, $meta_fh, 0; |
44 |
IO::AIO::mmap $data_data, $cacheblocks * BLKSIZE, IO::AIO::PROT_READ | IO::AIO::PROT_WRITE, IO::AIO::MAP_SHARED, $data_fh, 0; |
45 |
|
46 |
# IO::AIO::madvise $meta_data, 0, undef, IO::AIO::MADV_RANDOM; |
47 |
# IO::AIO::madvise $data_data, 0, undef, IO::AIO::MADV_RANDOM; |
48 |
} |
49 |
|
50 |
system "chattr +C \Q$cachemeta\E >/dev/null 2>&1"; |
51 |
system "chattr +C \Q$cachedata\E >/dev/null 2>&1"; |
52 |
|
53 |
system "btrfs property set \Q$cachemeta\E compression '' >/dev/null 2>&1"; |
54 |
system "btrfs property set \Q$cachedata\E compression '' >/dev/null 2>&1"; |
55 |
|
56 |
mapin; |
57 |
|
58 |
IO::AIO::mmap my $orig_data, $blocks * BLKSIZE, IO::AIO::PROT_READ | IO::AIO::PROT_WRITE, IO::AIO::MAP_SHARED, $orig_fh, 0; |
59 |
|
60 |
package myserver; |
61 |
|
62 |
use common::sense; |
63 |
|
64 |
sub BLKSIZE() { 512 } |
65 |
|
66 |
our @ISA = Linux::NBD::Server::; |
67 |
|
68 |
my $alloc = 0; |
69 |
#my $need_sync = 0; |
70 |
|
71 |
sub apply { |
72 |
my ($reset) = @_; |
73 |
|
74 |
Coro::AIO::aio_msync $cachedata, 0, undef, IO::AIO::MS_SYNC |
75 |
and die "cachedata msync: $!\n"; |
76 |
Coro::AIO::aio_msync $cachemeta, 0, undef, IO::AIO::MS_SYNC |
77 |
and die "cachemeta msync: $!\n"; |
78 |
|
79 |
my ($blk, $idx); |
80 |
|
81 |
my ($blk_nxt, $blk_buf, $blk_total); |
82 |
|
83 |
my $blk_flush = sub { |
84 |
return unless length $blk_buf; |
85 |
|
86 |
$blk_nxt -= (length $blk_buf) / BLKSIZE; |
87 |
$blk_total += length $blk_buf; |
88 |
|
89 |
if ($NTFS_COMPRESSION and 0x10000 > length $blk_buf) { |
90 |
printf "patch back blk %d len %d (total: %.3fGB)\n", $blk_nxt, length $blk_buf, $blk_total / 1e9; |
91 |
my $end = $blk_nxt * BLKSIZE + length $blk_buf; |
92 |
my $len = 0x10000 - length $blk_buf; |
93 |
|
94 |
$blk_buf .= substr $orig_data, $end, $len; |
95 |
|
96 |
die unless $end + $len == ($blk_nxt + 128) * BLKSIZE; |
97 |
# die unless 65536 == length $blk_buf; |
98 |
} else { |
99 |
printf "write back blk %d len %d (total: %.3fGB)\n", $blk_nxt, length $blk_buf, $blk_total / 1e9; |
100 |
} |
101 |
|
102 |
Coro::AIO::aio_write $orig_fh, $blk_nxt * BLKSIZE, undef, $blk_buf, 0;#d# |
103 |
#substr $orig_data, $blk_nxt * BLKSIZE, length $blk_buf, $blk_buf; |
104 |
|
105 |
$blk_buf = ""; |
106 |
}; |
107 |
|
108 |
my $nul = (pack "L", 0) x 65536; |
109 |
for (my $base = 0; $base * 4 < length $meta_data; $base += 65536) { |
110 |
if ($nul ne substr $meta_data, $base * 4, 65536 * 4) { |
111 |
# next unless $base >= 3585490256; |
112 |
# next unless $base >= 3643254984 - 65536; |
113 |
# next unless $base >= 3644405080; |
114 |
for $blk ($base .. $base + 65535) { |
115 |
$idx = unpack "L", substr $meta_data, $blk * 4, 4; |
116 |
|
117 |
if ($idx) { |
118 |
if ($blk_nxt != $blk or (64 * 1024 * 1024) <= length $blk_buf) { |
119 |
$blk_flush->(); |
120 |
} |
121 |
|
122 |
#warn "apply cache blk $idx to orig blk $blk\n"; |
123 |
$blk_nxt = $blk + 1; |
124 |
$blk_buf .= substr $data_data, $idx * BLKSIZE, BLKSIZE; |
125 |
|
126 |
#IO::AIO::aio_write $orig_fh, $blk * BLKSIZE, BLKSIZE, $data_data, $idx * BLKSIZE, sub { }; |
127 |
#substr $orig_data, $blk * BLKSIZE, BLKSIZE, substr $data_data, $idx * BLKSIZE, BLKSIZE; |
128 |
} |
129 |
} |
130 |
} |
131 |
} |
132 |
|
133 |
$blk_flush->(); |
134 |
print "done, syncing...\n"; |
135 |
Coro::AIO::aio_fsync $orig_fh if $SYNC; |
136 |
|
137 |
if ($reset) { |
138 |
print "resetting\n"; |
139 |
|
140 |
undef $meta_data; |
141 |
undef $data_data; |
142 |
|
143 |
Coro::AIO::aio_truncate $meta_fh, 0; |
144 |
Coro::AIO::aio_truncate $data_fh, 0; |
145 |
|
146 |
::mapin; |
147 |
|
148 |
Coro::AIO::aio_fsync $meta_fh if $SYNC; |
149 |
Coro::AIO::aio_fsync $data_fh if $SYNC; |
150 |
} |
151 |
|
152 |
$alloc = 0; |
153 |
} |
154 |
|
155 |
if ($apply) { |
156 |
apply $apply eq "r"; |
157 |
#exit 0; # maybe unsafe? |
158 |
} |
159 |
|
160 |
{ |
161 |
my $nul = "\x00" x 65536; |
162 |
for (my $ofs = 0; $ofs < length $meta_data; $ofs += 65536) { |
163 |
if ($nul ne substr $meta_data, $ofs, 65536) { |
164 |
for (unpack "L*", substr $meta_data, $ofs, 65536) { |
165 |
$alloc = $_ if $_ > $alloc; |
166 |
} |
167 |
} |
168 |
} |
169 |
} |
170 |
|
171 |
print "ALLOC is $alloc\n"; |
172 |
|
173 |
sub sync { |
174 |
Coro::AIO::aio_fsync $data_fh if $SYNC; |
175 |
Coro::AIO::aio_fsync $meta_fh if $SYNC; |
176 |
} |
177 |
|
178 |
sub req_read { |
179 |
my ($self, $handle, $ofs, $len) = @_; |
180 |
|
181 |
# warn "get read $ofs,$len\n";#d# |
182 |
|
183 |
my $buf; |
184 |
|
185 |
die if $ofs & (BLKSIZE - 1); |
186 |
die if $len & (BLKSIZE - 1); |
187 |
|
188 |
$ofs /= BLKSIZE; |
189 |
$len /= BLKSIZE; |
190 |
|
191 |
while ($len > 0) { |
192 |
my $idx = unpack "L", substr $meta_data, $ofs * 4, 4; |
193 |
|
194 |
if ($idx) { |
195 |
$buf .= substr $data_data, $idx * BLKSIZE, BLKSIZE; |
196 |
} else { |
197 |
$buf .= substr $orig_data, $ofs * BLKSIZE, BLKSIZE; |
198 |
} |
199 |
|
200 |
++$ofs; |
201 |
--$len; |
202 |
} |
203 |
|
204 |
$self->reply ($handle, 0, $buf); |
205 |
} |
206 |
|
207 |
sub req_write { |
208 |
my ($self, $handle, $ofs, $buf) = @_; |
209 |
|
210 |
# print "write $ofs, ", length $buf, "\n"; |
211 |
|
212 |
die if $ofs & (BLKSIZE - 1); |
213 |
die if (length $buf) & (BLKSIZE - 1); |
214 |
|
215 |
$ofs /= BLKSIZE; |
216 |
|
217 |
my $len = (length $buf) / BLKSIZE; |
218 |
|
219 |
while (length $buf) { |
220 |
my $idx = unpack "L", substr $meta_data, $ofs * 4, 4; |
221 |
|
222 |
if ($idx) { |
223 |
substr $data_data, $idx * BLKSIZE, BLKSIZE, substr $buf, 0, BLKSIZE; |
224 |
} else { |
225 |
if ($alloc + 1 >= $cacheblocks) { |
226 |
print "cache full, applying...\n"; |
227 |
#$need_sync = 1; |
228 |
sync; |
229 |
apply 1; |
230 |
print "cache applied\n"; |
231 |
} |
232 |
|
233 |
$idx = ++$alloc; |
234 |
|
235 |
substr $data_data, $idx * BLKSIZE, BLKSIZE, substr $buf, 0, BLKSIZE; |
236 |
substr $meta_data,$ofs * 4, 4, pack "L", $idx; |
237 |
} |
238 |
|
239 |
++$ofs; |
240 |
substr $buf, 0, BLKSIZE, ""; |
241 |
} |
242 |
|
243 |
$self->reply ($handle, 0); |
244 |
} |
245 |
|
246 |
sub req_flush { |
247 |
my ($self, $handle) = @_; |
248 |
|
249 |
# sync if $need_sync; |
250 |
|
251 |
$self->reply ($handle, 0); |
252 |
|
253 |
#print "flushed.\n"; |
254 |
} |
255 |
|
256 |
package main; |
257 |
|
258 |
#my $listen = new IO::Socket::INET LocalPort => 10809, Listen => 1; |
259 |
unlink "/tmp/nbdsock"; |
260 |
my $listen = new IO::Socket::UNIX Local => "/tmp/nbdsock", Listen => 1; |
261 |
|
262 |
while () { |
263 |
print "waiting for connections on nbd:unix:/tmp/nbdsock ...\n"; |
264 |
my $fh = $listen->accept; |
265 |
print "accepted.\n"; |
266 |
|
267 |
syswrite $fh, |
268 |
("NBDMAGIC\x00\x00\x42\x02\x81\x86\x12\x53") |
269 |
. (pack "Q> N x124", $blocks * BLKSIZE, 1 + 4); # has_flags + can_flush |
270 |
|
271 |
my $server = new myserver socket => $fh; |
272 |
|
273 |
$server->run; |
274 |
|
275 |
myserver::sync; |
276 |
} |
277 |
|