1 |
root |
1.1 |
#define _BSD_SOURCE // for endian.h |
2 |
|
|
|
3 |
|
|
#include <stddef.h> |
4 |
|
|
#include <stdlib.h> |
5 |
|
|
#include <stdio.h> |
6 |
|
|
#include <stdint.h> |
7 |
|
|
#include <string.h> |
8 |
|
|
|
9 |
|
|
#include <sys/types.h> |
10 |
|
|
#include <sys/socket.h> |
11 |
|
|
#include <netinet/in.h> |
12 |
|
|
#include <netinet/tcp.h> |
13 |
|
|
|
14 |
|
|
#include <sys/mman.h> |
15 |
|
|
#include <linux/nbd.h> |
16 |
|
|
#include <arpa/inet.h> |
17 |
|
|
|
18 |
|
|
#include <endian.h> |
19 |
|
|
|
20 |
|
|
// how stupid is that, the 1.2 header files define CL_VERSION_1_1, |
21 |
|
|
// but then fail to define the api functions unless you ALSO define |
22 |
|
|
// this. This breaks 100% of the opencl 1.1 apps, for what reason? |
23 |
|
|
// after all, the functions are deprecated, not removed. |
24 |
|
|
// in addition, you cannot test for this in any future-proof way. |
25 |
|
|
// each time a new opencl version comes out, you need to make a new |
26 |
|
|
// release. |
27 |
|
|
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS |
28 |
|
|
|
29 |
|
|
#ifdef __APPLE__ |
30 |
|
|
#define CLHDR(name) <OpenCL/name> |
31 |
|
|
#else |
32 |
|
|
#define CLHDR(name) <CL/name> |
33 |
|
|
#endif |
34 |
|
|
|
35 |
|
|
#include CLHDR(opencl.h) |
36 |
|
|
|
37 |
|
|
#undef NDEBUG |
38 |
|
|
#include <assert.h> |
39 |
|
|
|
40 |
|
|
static cl_int res; |
41 |
|
|
|
42 |
|
|
#define FAIL(name) \ |
43 |
|
|
do { fprintf (stderr, "cl" # name ": error %d", res); exit (1); } while (0) |
44 |
|
|
|
45 |
|
|
#define NEED_SUCCESS(name,args) \ |
46 |
|
|
do { \ |
47 |
|
|
res = cl ## name args; \ |
48 |
|
|
\ |
49 |
|
|
if (res) \ |
50 |
|
|
FAIL (name); \ |
51 |
|
|
} while (0) |
52 |
|
|
|
53 |
|
|
#define NEED_SUCCESS_ARG(retdecl, name, args) \ |
54 |
|
|
retdecl = cl ## name args; \ |
55 |
|
|
if (res) \ |
56 |
|
|
FAIL (name); |
57 |
|
|
|
58 |
|
|
typedef uint32_t off_type; |
59 |
|
|
|
60 |
|
|
static cl_platform_id platform; |
61 |
|
|
static cl_device_id device; |
62 |
|
|
static cl_context context; |
63 |
|
|
static cl_command_queue queue; |
64 |
|
|
|
65 |
|
|
#define LISTEN_PORT 3843 |
66 |
|
|
|
67 |
|
|
static off_type capacity = 768 * 1024 * 1024; |
68 |
|
|
static cl_mem disk; |
69 |
|
|
|
70 |
|
|
static struct nbd_request req; |
71 |
|
|
static struct nbd_reply reply; |
72 |
|
|
static char *buf; |
73 |
|
|
static uint32_t bufsize; |
74 |
|
|
|
75 |
|
|
static int fd; |
76 |
|
|
|
77 |
|
|
#ifndef MAP_LOCKED |
78 |
|
|
# define MAP_LOCKED 0 |
79 |
|
|
#endif |
80 |
|
|
|
81 |
|
|
static void |
82 |
|
|
xread (void *data, int len) |
83 |
|
|
{ |
84 |
|
|
int n = 0; |
85 |
|
|
|
86 |
|
|
while (len > n) |
87 |
|
|
{ |
88 |
|
|
int r = read (fd, n + (char *)data, len - n); |
89 |
|
|
assert (r > 0); |
90 |
|
|
n += r; |
91 |
|
|
} |
92 |
|
|
} |
93 |
|
|
|
94 |
|
|
static void |
95 |
|
|
xwrite (const void *data, int len) |
96 |
|
|
{ |
97 |
|
|
int n = 0; |
98 |
|
|
|
99 |
|
|
while (len > n) |
100 |
|
|
{ |
101 |
|
|
int r = write (fd, n + (char *)data, len - n); |
102 |
|
|
assert (r > 0); |
103 |
|
|
n += r; |
104 |
|
|
} |
105 |
|
|
} |
106 |
|
|
static buf_alloc (uint32_t size) |
107 |
|
|
{ |
108 |
|
|
if (bufsize >= size) |
109 |
|
|
return; |
110 |
|
|
|
111 |
|
|
if (buf) |
112 |
|
|
munmap (buf, bufsize); |
113 |
|
|
|
114 |
|
|
bufsize = (size + 4095) & ~4095; // too lazy to query PAGE_SIZE |
115 |
|
|
|
116 |
|
|
buf = mmap (0, bufsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0); |
117 |
|
|
|
118 |
|
|
if (!MAP_LOCKED) |
119 |
|
|
mlock (buf, bufsize); |
120 |
|
|
} |
121 |
|
|
|
122 |
|
|
static void |
123 |
|
|
oclread (off_type off, uint32_t len, void *buf) |
124 |
|
|
{ |
125 |
|
|
NEED_SUCCESS (EnqueueReadBuffer, (queue, disk, 1, off, len, buf, 0, 0, 0)); |
126 |
|
|
} |
127 |
|
|
|
128 |
|
|
static void |
129 |
|
|
oclwrite (off_type off, uint32_t len, void *buf) |
130 |
|
|
{ |
131 |
|
|
NEED_SUCCESS (EnqueueWriteBuffer, (queue, disk, 0, off, len, buf, 0, 0, 0)); |
132 |
|
|
} |
133 |
|
|
|
134 |
|
|
static void |
135 |
|
|
init (void) |
136 |
|
|
{ |
137 |
|
|
NEED_SUCCESS_ARG (disk, CreateBuffer, (context, 0, capacity, 0, &res)); |
138 |
|
|
|
139 |
|
|
const int block = 64 * 1024; |
140 |
|
|
off_type o; |
141 |
|
|
|
142 |
|
|
buf_alloc (block); |
143 |
|
|
|
144 |
|
|
for (o = 0; o < capacity; o += block) |
145 |
|
|
{ |
146 |
|
|
oclwrite (o, block, buf); |
147 |
|
|
oclread (o, block, buf); |
148 |
|
|
} |
149 |
|
|
|
150 |
|
|
static struct sockaddr_in sa; |
151 |
|
|
|
152 |
|
|
sa.sin_family = AF_INET; |
153 |
|
|
sa.sin_port = htons (LISTEN_PORT); |
154 |
|
|
sa.sin_addr.s_addr = htonl (0x7f000001); |
155 |
|
|
|
156 |
|
|
int listener = socket (AF_INET, SOCK_STREAM, 0); |
157 |
|
|
bind (listener, (void *)&sa, sizeof (sa)); |
158 |
|
|
listen (listener, 1); |
159 |
|
|
|
160 |
|
|
printf ("waiting for connect on port %d...", LISTEN_PORT); fflush (stdout); |
161 |
|
|
fd = accept (listener, 0, 0); |
162 |
|
|
assert (fd >= 0); |
163 |
|
|
printf (" ok\n"); |
164 |
|
|
|
165 |
|
|
close (listener); |
166 |
|
|
|
167 |
|
|
{ |
168 |
|
|
xwrite ("NBDMAGIC", 8); |
169 |
|
|
xwrite ("\x00\x00\x42\x02\x81\x86\x12\x53", 8); |
170 |
|
|
|
171 |
|
|
uint64_t cap = htobe64 (capacity); |
172 |
|
|
xwrite (&cap, 8); |
173 |
|
|
|
174 |
|
|
int i; |
175 |
|
|
cap = 0; |
176 |
|
|
for (i = 0; i < 128 / 8; ++i) |
177 |
|
|
xwrite (&cap, 8); |
178 |
|
|
} |
179 |
|
|
} |
180 |
|
|
|
181 |
|
|
static void |
182 |
|
|
run (void) |
183 |
|
|
{ |
184 |
|
|
for (;;) |
185 |
|
|
{ |
186 |
|
|
xread (&req, sizeof (req)); |
187 |
|
|
assert (req.magic == htonl (NBD_REQUEST_MAGIC)); |
188 |
|
|
|
189 |
|
|
int type = ntohl (req.type); |
190 |
|
|
off_type off = be64toh (req.from); |
191 |
|
|
uint32_t len = ntohl (req.len); |
192 |
|
|
|
193 |
|
|
reply.magic = htonl (NBD_REPLY_MAGIC); |
194 |
|
|
memcpy (reply.handle, req.handle, sizeof (reply.handle)); |
195 |
|
|
reply.error = 0; |
196 |
|
|
|
197 |
|
|
//printf ("type %d off %d len %d\n", type, (int)off, (int)len);//D |
198 |
|
|
|
199 |
|
|
if (off + len > capacity) |
200 |
|
|
reply.error = htonl (1); |
201 |
|
|
else if (type == NBD_CMD_READ) |
202 |
|
|
{ |
203 |
|
|
buf_alloc (len); |
204 |
|
|
|
205 |
|
|
clFinish (queue); |
206 |
|
|
oclread (off, len, buf); |
207 |
|
|
|
208 |
|
|
xwrite (&reply, sizeof (reply)); |
209 |
|
|
xwrite (buf, len); |
210 |
|
|
} |
211 |
|
|
else if (type == NBD_CMD_WRITE) |
212 |
|
|
{ |
213 |
|
|
clFinish (queue); |
214 |
|
|
|
215 |
|
|
buf_alloc (len); |
216 |
|
|
xread (buf, len); |
217 |
|
|
xwrite (&reply, sizeof (reply)); |
218 |
|
|
|
219 |
|
|
oclwrite (off, len, buf); |
220 |
|
|
} |
221 |
|
|
else |
222 |
|
|
abort (); |
223 |
|
|
} |
224 |
|
|
} |
225 |
|
|
|
226 |
|
|
int |
227 |
|
|
main (int argc, char **argv) |
228 |
|
|
{ |
229 |
|
|
cl_platform_id *list; |
230 |
|
|
cl_uint count; |
231 |
|
|
int i; |
232 |
|
|
|
233 |
|
|
// we would really like to MCL_FUTURE, as being swapped out could turn |
234 |
|
|
// out to be fatal but we don't, as at least nvidia's opencl results |
235 |
|
|
// in a 1.5gb allocation (on my 1gb card) instantly. |
236 |
|
|
//mlockall (MCL_CURRENT | MCL_FUTURE); |
237 |
|
|
mlockall (MCL_CURRENT); |
238 |
|
|
|
239 |
|
|
NEED_SUCCESS (GetPlatformIDs, (0, 0, &count)); |
240 |
|
|
list = malloc (sizeof (*list) * count); |
241 |
|
|
NEED_SUCCESS (GetPlatformIDs, (count, list, 0)); |
242 |
|
|
|
243 |
|
|
for (i = 0; i < count; ++i) |
244 |
|
|
{ |
245 |
|
|
platform = list [i]; |
246 |
|
|
|
247 |
|
|
cl_device_id *list; |
248 |
|
|
cl_uint count; |
249 |
|
|
int i; |
250 |
|
|
|
251 |
|
|
NEED_SUCCESS (GetDeviceIDs, (platform, CL_DEVICE_TYPE_GPU, 0, 0, &count)); |
252 |
|
|
list = malloc (sizeof (*list) * count); |
253 |
|
|
NEED_SUCCESS (GetDeviceIDs, (platform, CL_DEVICE_TYPE_GPU, count, list, 0)); |
254 |
|
|
|
255 |
|
|
for (i = 0; i < count; ++i) |
256 |
|
|
{ |
257 |
|
|
device = list [i]; |
258 |
|
|
NEED_SUCCESS_ARG (context, CreateContext, (0, 1, &device, 0, 0, &res)); |
259 |
|
|
NEED_SUCCESS_ARG (queue, CreateCommandQueue, (context, device, 0, &res)); |
260 |
|
|
|
261 |
|
|
init (); |
262 |
|
|
run (); |
263 |
|
|
exit (0); |
264 |
|
|
} |
265 |
|
|
|
266 |
|
|
free (list); |
267 |
|
|
} |
268 |
|
|
|
269 |
|
|
fprintf (stderr, "unable to create suitable opencl context\n"); |
270 |
|
|
return 1; |
271 |
|
|
} |
272 |
|
|
|