1 |
#define _BSD_SOURCE // for endian.h |
2 |
|
3 |
#include <stddef.h> |
4 |
#include <stdlib.h> |
5 |
#include <stdio.h> |
6 |
#include <stdint.h> |
7 |
#include <string.h> |
8 |
|
9 |
#include <sys/types.h> |
10 |
#include <sys/socket.h> |
11 |
#include <netinet/in.h> |
12 |
#include <netinet/tcp.h> |
13 |
|
14 |
#include <sys/mman.h> |
15 |
#include <linux/nbd.h> |
16 |
#include <arpa/inet.h> |
17 |
|
18 |
#include <endian.h> |
19 |
|
20 |
// how stupid is that, the 1.2 header files define CL_VERSION_1_1, |
21 |
// but then fail to define the api functions unless you ALSO define |
22 |
// this. This breaks 100% of the opencl 1.1 apps, for what reason? |
23 |
// after all, the functions are deprecated, not removed. |
24 |
// in addition, you cannot test for this in any future-proof way. |
25 |
// each time a new opencl version comes out, you need to make a new |
26 |
// release. |
27 |
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS |
28 |
|
29 |
#ifdef __APPLE__ |
30 |
#define CLHDR(name) <OpenCL/name> |
31 |
#else |
32 |
#define CLHDR(name) <CL/name> |
33 |
#endif |
34 |
|
35 |
#include CLHDR(opencl.h) |
36 |
|
37 |
#undef NDEBUG |
38 |
#include <assert.h> |
39 |
|
40 |
static cl_int res; |
41 |
|
42 |
#define FAIL(name) \ |
43 |
do { fprintf (stderr, "cl" # name ": error %d", res); exit (1); } while (0) |
44 |
|
45 |
#define NEED_SUCCESS(name,args) \ |
46 |
do { \ |
47 |
res = cl ## name args; \ |
48 |
\ |
49 |
if (res) \ |
50 |
FAIL (name); \ |
51 |
} while (0) |
52 |
|
53 |
#define NEED_SUCCESS_ARG(retdecl, name, args) \ |
54 |
retdecl = cl ## name args; \ |
55 |
if (res) \ |
56 |
FAIL (name); |
57 |
|
58 |
typedef uint32_t off_type; |
59 |
|
60 |
static cl_platform_id platform; |
61 |
static cl_device_id device; |
62 |
static cl_context context; |
63 |
static cl_command_queue queue; |
64 |
|
65 |
#define LISTEN_PORT 3843 |
66 |
|
67 |
static off_type capacity = 768 * 1024 * 1024; |
68 |
static cl_mem disk; |
69 |
|
70 |
static struct nbd_request req; |
71 |
static struct nbd_reply reply; |
72 |
static char *buf; |
73 |
static uint32_t bufsize; |
74 |
|
75 |
static int fd; |
76 |
|
77 |
#ifndef MAP_LOCKED |
78 |
# define MAP_LOCKED 0 |
79 |
#endif |
80 |
|
81 |
static void |
82 |
xread (void *data, int len) |
83 |
{ |
84 |
int n = 0; |
85 |
|
86 |
while (len > n) |
87 |
{ |
88 |
int r = read (fd, n + (char *)data, len - n); |
89 |
assert (r > 0); |
90 |
n += r; |
91 |
} |
92 |
} |
93 |
|
94 |
static void |
95 |
xwrite (const void *data, int len) |
96 |
{ |
97 |
int n = 0; |
98 |
|
99 |
while (len > n) |
100 |
{ |
101 |
int r = write (fd, n + (char *)data, len - n); |
102 |
assert (r > 0); |
103 |
n += r; |
104 |
} |
105 |
} |
106 |
static buf_alloc (uint32_t size) |
107 |
{ |
108 |
if (bufsize >= size) |
109 |
return; |
110 |
|
111 |
if (buf) |
112 |
munmap (buf, bufsize); |
113 |
|
114 |
bufsize = (size + 4095) & ~4095; // too lazy to query PAGE_SIZE |
115 |
|
116 |
buf = mmap (0, bufsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0); |
117 |
|
118 |
if (!MAP_LOCKED) |
119 |
mlock (buf, bufsize); |
120 |
} |
121 |
|
122 |
static void |
123 |
oclread (off_type off, uint32_t len, void *buf) |
124 |
{ |
125 |
NEED_SUCCESS (EnqueueReadBuffer, (queue, disk, 1, off, len, buf, 0, 0, 0)); |
126 |
} |
127 |
|
128 |
static void |
129 |
oclwrite (off_type off, uint32_t len, void *buf) |
130 |
{ |
131 |
NEED_SUCCESS (EnqueueWriteBuffer, (queue, disk, 0, off, len, buf, 0, 0, 0)); |
132 |
} |
133 |
|
134 |
static void |
135 |
init (void) |
136 |
{ |
137 |
NEED_SUCCESS_ARG (disk, CreateBuffer, (context, 0, capacity, 0, &res)); |
138 |
|
139 |
const int block = 64 * 1024; |
140 |
off_type o; |
141 |
|
142 |
buf_alloc (block); |
143 |
|
144 |
for (o = 0; o < capacity; o += block) |
145 |
{ |
146 |
oclwrite (o, block, buf); |
147 |
oclread (o, block, buf); |
148 |
} |
149 |
|
150 |
static struct sockaddr_in sa; |
151 |
|
152 |
sa.sin_family = AF_INET; |
153 |
sa.sin_port = htons (LISTEN_PORT); |
154 |
sa.sin_addr.s_addr = htonl (0x7f000001); |
155 |
|
156 |
int listener = socket (AF_INET, SOCK_STREAM, 0); |
157 |
bind (listener, (void *)&sa, sizeof (sa)); |
158 |
listen (listener, 1); |
159 |
|
160 |
printf ("waiting for connect on port %d...", LISTEN_PORT); fflush (stdout); |
161 |
fd = accept (listener, 0, 0); |
162 |
assert (fd >= 0); |
163 |
printf (" ok\n"); |
164 |
|
165 |
close (listener); |
166 |
|
167 |
{ |
168 |
xwrite ("NBDMAGIC", 8); |
169 |
xwrite ("\x00\x00\x42\x02\x81\x86\x12\x53", 8); |
170 |
|
171 |
uint64_t cap = htobe64 (capacity); |
172 |
xwrite (&cap, 8); |
173 |
|
174 |
int i; |
175 |
cap = 0; |
176 |
for (i = 0; i < 128 / 8; ++i) |
177 |
xwrite (&cap, 8); |
178 |
} |
179 |
} |
180 |
|
181 |
static void |
182 |
run (void) |
183 |
{ |
184 |
for (;;) |
185 |
{ |
186 |
xread (&req, sizeof (req)); |
187 |
assert (req.magic == htonl (NBD_REQUEST_MAGIC)); |
188 |
|
189 |
int type = ntohl (req.type); |
190 |
off_type off = be64toh (req.from); |
191 |
uint32_t len = ntohl (req.len); |
192 |
|
193 |
reply.magic = htonl (NBD_REPLY_MAGIC); |
194 |
memcpy (reply.handle, req.handle, sizeof (reply.handle)); |
195 |
reply.error = 0; |
196 |
|
197 |
//printf ("type %d off %d len %d\n", type, (int)off, (int)len);//D |
198 |
|
199 |
if (off + len > capacity) |
200 |
reply.error = htonl (1); |
201 |
else if (type == NBD_CMD_READ) |
202 |
{ |
203 |
buf_alloc (len); |
204 |
|
205 |
clFinish (queue); |
206 |
oclread (off, len, buf); |
207 |
|
208 |
xwrite (&reply, sizeof (reply)); |
209 |
xwrite (buf, len); |
210 |
} |
211 |
else if (type == NBD_CMD_WRITE) |
212 |
{ |
213 |
clFinish (queue); |
214 |
|
215 |
buf_alloc (len); |
216 |
xread (buf, len); |
217 |
xwrite (&reply, sizeof (reply)); |
218 |
|
219 |
oclwrite (off, len, buf); |
220 |
} |
221 |
else |
222 |
abort (); |
223 |
} |
224 |
} |
225 |
|
226 |
int |
227 |
main (int argc, char **argv) |
228 |
{ |
229 |
cl_platform_id *list; |
230 |
cl_uint count; |
231 |
int i; |
232 |
|
233 |
// we would really like to MCL_FUTURE, as being swapped out could turn |
234 |
// out to be fatal but we don't, as at least nvidia's opencl results |
235 |
// in a 1.5gb allocation (on my 1gb card) instantly. |
236 |
//mlockall (MCL_CURRENT | MCL_FUTURE); |
237 |
mlockall (MCL_CURRENT); |
238 |
|
239 |
NEED_SUCCESS (GetPlatformIDs, (0, 0, &count)); |
240 |
list = malloc (sizeof (*list) * count); |
241 |
NEED_SUCCESS (GetPlatformIDs, (count, list, 0)); |
242 |
|
243 |
for (i = 0; i < count; ++i) |
244 |
{ |
245 |
platform = list [i]; |
246 |
|
247 |
cl_device_id *list; |
248 |
cl_uint count; |
249 |
int i; |
250 |
|
251 |
NEED_SUCCESS (GetDeviceIDs, (platform, CL_DEVICE_TYPE_GPU, 0, 0, &count)); |
252 |
list = malloc (sizeof (*list) * count); |
253 |
NEED_SUCCESS (GetDeviceIDs, (platform, CL_DEVICE_TYPE_GPU, count, list, 0)); |
254 |
|
255 |
for (i = 0; i < count; ++i) |
256 |
{ |
257 |
device = list [i]; |
258 |
NEED_SUCCESS_ARG (context, CreateContext, (0, 1, &device, 0, 0, &res)); |
259 |
NEED_SUCCESS_ARG (queue, CreateCommandQueue, (context, device, 0, &res)); |
260 |
|
261 |
init (); |
262 |
run (); |
263 |
exit (0); |
264 |
} |
265 |
|
266 |
free (list); |
267 |
} |
268 |
|
269 |
fprintf (stderr, "unable to create suitable opencl context\n"); |
270 |
return 1; |
271 |
} |
272 |
|