1 |
#!/opt/bin/perl |
2 |
|
3 |
my ($cachemeta, $cachedata, $sizegb, $orig, $apply) = @ARGV |
4 |
or die "Usage: $0 /cache/meta /cache/data size_gb origdevice\n"; |
5 |
|
6 |
use common::sense; |
7 |
|
8 |
use IO::AIO (); |
9 |
use Coro::AIO (); |
10 |
|
11 |
use Linux::NBD; |
12 |
use Linux::NBD::Server; |
13 |
|
14 |
use POSIX (); |
15 |
|
16 |
use IO::Socket::UNIX; |
17 |
|
18 |
sub BLKSIZE() { 512 } |
19 |
|
20 |
my $blocks = qx<blockdev --getsz \Q$orig> * 512 / BLKSIZE; |
21 |
|
22 |
sysopen my $orig_fh, $orig, IO::AIO::O_RDWR |
23 |
or die "$orig: $!"; |
24 |
|
25 |
sysopen my $meta_fh, "$cachemeta", IO::AIO::O_RDWR | IO::AIO::O_CREAT |
26 |
or die "$cachemeta: $!"; |
27 |
|
28 |
sysopen my $data_fh, "$cachedata", IO::AIO::O_RDWR | IO::AIO::O_CREAT |
29 |
or die "$cachedata: $!"; |
30 |
|
31 |
my $cacheblocks = $sizegb * 1024 * 1024 * 1024 / BLKSIZE; |
32 |
|
33 |
my $meta_data; |
34 |
my $data_data; |
35 |
|
36 |
sub mapin { |
37 |
Coro::AIO::aio_truncate $meta_fh, $blocks * 4; |
38 |
Coro::AIO::aio_truncate $data_fh, $cacheblocks * BLKSIZE; |
39 |
|
40 |
IO::AIO::mmap $meta_data, $blocks * 4 , IO::AIO::PROT_READ | IO::AIO::PROT_WRITE, IO::AIO::MAP_SHARED, $meta_fh, 0; |
41 |
IO::AIO::mmap $data_data, $cacheblocks * BLKSIZE, IO::AIO::PROT_READ | IO::AIO::PROT_WRITE, IO::AIO::MAP_SHARED, $data_fh, 0; |
42 |
} |
43 |
|
44 |
system "chattr", "+C", $cachemeta; |
45 |
system "chattr", "+C", $cachedata; |
46 |
|
47 |
mapin; |
48 |
|
49 |
IO::AIO::mmap my $orig_data, $blocks * BLKSIZE, IO::AIO::PROT_READ | IO::AIO::PROT_WRITE, IO::AIO::MAP_SHARED, $orig_fh, 0; |
50 |
|
51 |
package myserver; |
52 |
|
53 |
use common::sense; |
54 |
|
55 |
sub BLKSIZE() { 512 } |
56 |
|
57 |
our @ISA = Linux::NBD::Server::; |
58 |
|
59 |
my $alloc = 0; |
60 |
#my $need_sync = 0; |
61 |
|
62 |
sub apply { |
63 |
my ($reset) = @_; |
64 |
|
65 |
my ($blk, $idx); |
66 |
|
67 |
my ($blk_nxt, $blk_buf, $blk_total); |
68 |
|
69 |
my $blk_flush = sub { |
70 |
return unless length $blk_buf; |
71 |
|
72 |
$blk_nxt -= (length $blk_buf) / BLKSIZE; |
73 |
$blk_total += length $blk_buf; |
74 |
|
75 |
printf "write back blk %d len %d (total: %.3fGiB)\n", $blk_nxt, length $blk_buf, $blk_total * (1 << 30); |
76 |
Coro::AIO::aio_write $orig_fh, $blk_nxt * BLKSIZE, undef, $blk_buf, 0; |
77 |
#substr $orig_data, $blk_nxt * BLKSIZE, length $blk_buf, $blk_buf; |
78 |
|
79 |
$blk_buf = ""; |
80 |
}; |
81 |
|
82 |
my $nul = (pack "L", 0) x 65536; |
83 |
for (my $base = 0; $base * 4 < length $meta_data; $base += 65536) { |
84 |
if ($nul ne substr $meta_data, $base * 4, 65536 * 4) { |
85 |
for $blk ($base .. $base + 65535) { |
86 |
$idx = unpack "L", substr $meta_data, $blk * 4, 4; |
87 |
|
88 |
if ($idx) { |
89 |
if ($blk_nxt != $blk or (64 * 1024 * 1024) <= length $blk_buf) { |
90 |
$blk_flush->(); |
91 |
} |
92 |
|
93 |
#warn "apply cache blk $idx to orig blk $blk\n"; |
94 |
$blk_nxt = $blk + 1; |
95 |
$blk_buf .= substr $data_data, $idx * BLKSIZE, BLKSIZE; |
96 |
|
97 |
#IO::AIO::aio_write $orig_fh, $blk * BLKSIZE, BLKSIZE, $data_data, $idx * BLKSIZE, sub { }; |
98 |
#substr $orig_data, $blk * BLKSIZE, BLKSIZE, substr $data_data, $idx * BLKSIZE, BLKSIZE; |
99 |
} |
100 |
} |
101 |
} |
102 |
} |
103 |
|
104 |
print "done, syncing...\n"; |
105 |
$blk_flush->(); |
106 |
Coro::AIO::aio_fsync $orig_fh; |
107 |
|
108 |
if ($reset) { |
109 |
print "resetting\n"; |
110 |
|
111 |
undef $meta_data; |
112 |
undef $data_data; |
113 |
|
114 |
Coro::AIO::aio_truncate $meta_fh, 0; |
115 |
Coro::AIO::aio_truncate $data_fh, 0; |
116 |
|
117 |
::mapin; |
118 |
|
119 |
Coro::AIO::aio_fsync $meta_fh; |
120 |
Coro::AIO::aio_fsync $data_fh; |
121 |
} |
122 |
|
123 |
$alloc = 0; |
124 |
} |
125 |
|
126 |
apply $apply eq "r" if $apply; |
127 |
|
128 |
{ |
129 |
my $nul = "\x00" x 65536; |
130 |
for (my $ofs = 0; $ofs < length $meta_data; $ofs += 65536) { |
131 |
if ($nul ne substr $meta_data, $ofs, 65536) { |
132 |
for (unpack "L*", substr $meta_data, $ofs, 65536) { |
133 |
$alloc = $_ if $_ > $alloc; |
134 |
} |
135 |
} |
136 |
} |
137 |
} |
138 |
|
139 |
print "ALLOC is $alloc\n"; |
140 |
|
141 |
sub sync { |
142 |
Coro::AIO::aio_fsync $data_fh; |
143 |
Coro::AIO::aio_fsync $meta_fh; |
144 |
} |
145 |
|
146 |
sub req_read { |
147 |
my ($self, $handle, $ofs, $len) = @_; |
148 |
|
149 |
# warn "get read $ofs,$len\n";#d# |
150 |
|
151 |
my $buf; |
152 |
|
153 |
die if $ofs & (BLKSIZE - 1); |
154 |
die if $len & (BLKSIZE - 1); |
155 |
|
156 |
$ofs /= BLKSIZE; |
157 |
$len /= BLKSIZE; |
158 |
|
159 |
while ($len > 0) { |
160 |
my $idx = unpack "L", substr $meta_data, $ofs * 4, 4; |
161 |
|
162 |
if ($idx) { |
163 |
$buf .= substr $data_data, $idx * BLKSIZE, BLKSIZE; |
164 |
} else { |
165 |
$buf .= substr $orig_data, $ofs * BLKSIZE, BLKSIZE; |
166 |
} |
167 |
|
168 |
++$ofs; |
169 |
--$len; |
170 |
} |
171 |
|
172 |
$self->reply ($handle, 0, $buf); |
173 |
} |
174 |
|
175 |
sub req_write { |
176 |
my ($self, $handle, $ofs, $buf) = @_; |
177 |
|
178 |
# print "write $ofs, ", length $buf, "\n"; |
179 |
|
180 |
die if $ofs & (BLKSIZE - 1); |
181 |
die if (length $buf) & (BLKSIZE - 1); |
182 |
|
183 |
$ofs /= BLKSIZE; |
184 |
|
185 |
my $len = (length $buf) / BLKSIZE; |
186 |
|
187 |
while (length $buf) { |
188 |
my $idx = unpack "L", substr $meta_data, $ofs * 4, 4; |
189 |
|
190 |
if ($idx) { |
191 |
substr $data_data, $idx * BLKSIZE, BLKSIZE, substr $buf, 0, BLKSIZE; |
192 |
} else { |
193 |
if ($alloc + 1 >= $cacheblocks) { |
194 |
print "cache full, applying...\n"; |
195 |
#$need_sync = 1; |
196 |
sync; |
197 |
apply 1; |
198 |
print "cache applied\n"; |
199 |
} |
200 |
|
201 |
$idx = ++$alloc; |
202 |
|
203 |
substr $data_data, $idx * BLKSIZE, BLKSIZE, substr $buf, 0, BLKSIZE; |
204 |
substr $meta_data,$ofs * 4, 4, pack "L", $idx; |
205 |
} |
206 |
|
207 |
++$ofs; |
208 |
substr $buf, 0, BLKSIZE, ""; |
209 |
} |
210 |
|
211 |
$self->reply ($handle, 0); |
212 |
} |
213 |
|
214 |
sub req_flush { |
215 |
my ($self, $handle) = @_; |
216 |
|
217 |
# sync if $need_sync; |
218 |
|
219 |
$self->reply ($handle, 0); |
220 |
|
221 |
#print "flushed.\n"; |
222 |
} |
223 |
|
224 |
package main; |
225 |
|
226 |
#my $listen = new IO::Socket::INET LocalPort => 10809, Listen => 1; |
227 |
unlink "/tmp/nbdsock"; |
228 |
my $listen = new IO::Socket::UNIX Local => "/tmp/nbdsock", Listen => 1; |
229 |
|
230 |
while () { |
231 |
print "waiting for connections...\n"; |
232 |
my $fh = $listen->accept; |
233 |
print "accepted.\n"; |
234 |
|
235 |
syswrite $fh, |
236 |
("NBDMAGIC\x00\x00\x42\x02\x81\x86\x12\x53") |
237 |
. (pack "Q> N x124", $blocks * BLKSIZE, 1 + 4); # has_flags + can_flush |
238 |
|
239 |
my $server = new myserver socket => $fh; |
240 |
|
241 |
$server->run; |
242 |
} |
243 |
|