ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/gvpe/src/curve25519-donna-c64.c
Revision: 1.1
Committed: Sat Jan 17 08:35:16 2015 UTC (9 years, 4 months ago) by root
Content type: text/plain
Branch: MAIN
CVS Tags: rel-3_0, HEAD
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 /* Copyright 2008, Google Inc.
2     * All rights reserved.
3     *
4     * Code released into the public domain.
5     *
6     * curve25519-donna: Curve25519 elliptic curve, public key function
7     *
8     * http://code.google.com/p/curve25519-donna/
9     *
10     * Adam Langley <agl@imperialviolet.org>
11     *
12     * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
13     *
14     * More information about curve25519 can be found here
15     * http://cr.yp.to/ecdh.html
16     *
17     * djb's sample implementation of curve25519 is written in a special assembly
18     * language called qhasm and uses the floating point registers.
19     *
20     * This is, almost, a clean room reimplementation from the curve25519 paper. It
21     * uses many of the tricks described therein. Only the crecip function is taken
22     * from the sample implementation.
23     */
24    
25     #include <string.h>
26     #include <stdint.h>
27    
28     typedef uint8_t u8;
29     typedef uint64_t limb;
30     typedef limb felem[5];
31     // This is a special gcc mode for 128-bit integers. It's implemented on 64-bit
32     // platforms only as far as I know.
33     typedef unsigned uint128_t __attribute__((mode(TI)));
34    
35     #undef force_inline
36     #define force_inline __attribute__((always_inline))
37    
38     /* Sum two numbers: output += in */
39     static inline void force_inline
40     fsum(limb *output, const limb *in) {
41     output[0] += in[0];
42     output[1] += in[1];
43     output[2] += in[2];
44     output[3] += in[3];
45     output[4] += in[4];
46     }
47    
48     /* Find the difference of two numbers: output = in - output
49     * (note the order of the arguments!)
50     *
51     * Assumes that out[i] < 2**52
52     * On return, out[i] < 2**55
53     */
54     static inline void force_inline
55     fdifference_backwards(felem out, const felem in) {
56     /* 152 is 19 << 3 */
57     static const limb two54m152 = (((limb)1) << 54) - 152;
58     static const limb two54m8 = (((limb)1) << 54) - 8;
59    
60     out[0] = in[0] + two54m152 - out[0];
61     out[1] = in[1] + two54m8 - out[1];
62     out[2] = in[2] + two54m8 - out[2];
63     out[3] = in[3] + two54m8 - out[3];
64     out[4] = in[4] + two54m8 - out[4];
65     }
66    
67     /* Multiply a number by a scalar: output = in * scalar */
68     static inline void force_inline
69     fscalar_product(felem output, const felem in, const limb scalar) {
70     uint128_t a;
71    
72     a = ((uint128_t) in[0]) * scalar;
73     output[0] = ((limb)a) & 0x7ffffffffffff;
74    
75     a = ((uint128_t) in[1]) * scalar + ((limb) (a >> 51));
76     output[1] = ((limb)a) & 0x7ffffffffffff;
77    
78     a = ((uint128_t) in[2]) * scalar + ((limb) (a >> 51));
79     output[2] = ((limb)a) & 0x7ffffffffffff;
80    
81     a = ((uint128_t) in[3]) * scalar + ((limb) (a >> 51));
82     output[3] = ((limb)a) & 0x7ffffffffffff;
83    
84     a = ((uint128_t) in[4]) * scalar + ((limb) (a >> 51));
85     output[4] = ((limb)a) & 0x7ffffffffffff;
86    
87     output[0] += (a >> 51) * 19;
88     }
89    
90     /* Multiply two numbers: output = in2 * in
91     *
92     * output must be distinct to both inputs. The inputs are reduced coefficient
93     * form, the output is not.
94     *
95     * Assumes that in[i] < 2**55 and likewise for in2.
96     * On return, output[i] < 2**52
97     */
98     static inline void force_inline
99     fmul(felem output, const felem in2, const felem in) {
100     uint128_t t[5];
101     limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
102    
103     r0 = in[0];
104     r1 = in[1];
105     r2 = in[2];
106     r3 = in[3];
107     r4 = in[4];
108    
109     s0 = in2[0];
110     s1 = in2[1];
111     s2 = in2[2];
112     s3 = in2[3];
113     s4 = in2[4];
114    
115     t[0] = ((uint128_t) r0) * s0;
116     t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0;
117     t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1;
118     t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1;
119     t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2;
120    
121     r4 *= 19;
122     r1 *= 19;
123     r2 *= 19;
124     r3 *= 19;
125    
126     t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2;
127     t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3;
128     t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4;
129     t[3] += ((uint128_t) r4) * s4;
130    
131     r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
132     t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
133     t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
134     t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
135     t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
136     r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
137     r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
138     r2 += c;
139    
140     output[0] = r0;
141     output[1] = r1;
142     output[2] = r2;
143     output[3] = r3;
144     output[4] = r4;
145     }
146    
147     static inline void force_inline
148     fsquare_times(felem output, const felem in, limb count) {
149     uint128_t t[5];
150     limb r0,r1,r2,r3,r4,c;
151     limb d0,d1,d2,d4,d419;
152    
153     r0 = in[0];
154     r1 = in[1];
155     r2 = in[2];
156     r3 = in[3];
157     r4 = in[4];
158    
159     do {
160     d0 = r0 * 2;
161     d1 = r1 * 2;
162     d2 = r2 * 2 * 19;
163     d419 = r4 * 19;
164     d4 = d419 * 2;
165    
166     t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 ));
167     t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
168     t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 ));
169     t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 ));
170     t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 ));
171    
172     r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
173     t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
174     t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
175     t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
176     t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
177     r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
178     r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
179     r2 += c;
180     } while(--count);
181    
182     output[0] = r0;
183     output[1] = r1;
184     output[2] = r2;
185     output[3] = r3;
186     output[4] = r4;
187     }
188    
189     /* Load a little-endian 64-bit number */
190     static limb
191     load_limb(const u8 *in) {
192     return
193     ((limb)in[0]) |
194     (((limb)in[1]) << 8) |
195     (((limb)in[2]) << 16) |
196     (((limb)in[3]) << 24) |
197     (((limb)in[4]) << 32) |
198     (((limb)in[5]) << 40) |
199     (((limb)in[6]) << 48) |
200     (((limb)in[7]) << 56);
201     }
202    
203     static void
204     store_limb(u8 *out, limb in) {
205     out[0] = in & 0xff;
206     out[1] = (in >> 8) & 0xff;
207     out[2] = (in >> 16) & 0xff;
208     out[3] = (in >> 24) & 0xff;
209     out[4] = (in >> 32) & 0xff;
210     out[5] = (in >> 40) & 0xff;
211     out[6] = (in >> 48) & 0xff;
212     out[7] = (in >> 56) & 0xff;
213     }
214    
215     /* Take a little-endian, 32-byte number and expand it into polynomial form */
216     static void
217     fexpand(limb *output, const u8 *in) {
218     output[0] = load_limb(in) & 0x7ffffffffffff;
219     output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
220     output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
221     output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
222     output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
223     }
224    
225     /* Take a fully reduced polynomial form number and contract it into a
226     * little-endian, 32-byte array
227     */
228     static void
229     fcontract(u8 *output, const felem input) {
230     uint128_t t[5];
231    
232     t[0] = input[0];
233     t[1] = input[1];
234     t[2] = input[2];
235     t[3] = input[3];
236     t[4] = input[4];
237    
238     t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
239     t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
240     t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
241     t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
242     t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
243    
244     t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
245     t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
246     t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
247     t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
248     t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
249    
250     /* now t is between 0 and 2^255-1, properly carried. */
251     /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
252    
253     t[0] += 19;
254    
255     t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
256     t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
257     t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
258     t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
259     t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
260    
261     /* now between 19 and 2^255-1 in both cases, and offset by 19. */
262    
263     t[0] += 0x8000000000000 - 19;
264     t[1] += 0x8000000000000 - 1;
265     t[2] += 0x8000000000000 - 1;
266     t[3] += 0x8000000000000 - 1;
267     t[4] += 0x8000000000000 - 1;
268    
269     /* now between 2^255 and 2^256-20, and offset by 2^255. */
270    
271     t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
272     t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
273     t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
274     t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
275     t[4] &= 0x7ffffffffffff;
276    
277     store_limb(output, t[0] | (t[1] << 51));
278     store_limb(output+8, (t[1] >> 13) | (t[2] << 38));
279     store_limb(output+16, (t[2] >> 26) | (t[3] << 25));
280     store_limb(output+24, (t[3] >> 39) | (t[4] << 12));
281     }
282    
283     /* Input: Q, Q', Q-Q'
284     * Output: 2Q, Q+Q'
285     *
286     * x2 z3: long form
287     * x3 z3: long form
288     * x z: short form, destroyed
289     * xprime zprime: short form, destroyed
290     * qmqp: short form, preserved
291     */
292     static void
293     fmonty(limb *x2, limb *z2, /* output 2Q */
294     limb *x3, limb *z3, /* output Q + Q' */
295     limb *x, limb *z, /* input Q */
296     limb *xprime, limb *zprime, /* input Q' */
297     const limb *qmqp /* input Q - Q' */) {
298     limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
299     zzprime[5], zzzprime[5];
300    
301     memcpy(origx, x, 5 * sizeof(limb));
302     fsum(x, z);
303     fdifference_backwards(z, origx); // does x - z
304    
305     memcpy(origxprime, xprime, sizeof(limb) * 5);
306     fsum(xprime, zprime);
307     fdifference_backwards(zprime, origxprime);
308     fmul(xxprime, xprime, z);
309     fmul(zzprime, x, zprime);
310     memcpy(origxprime, xxprime, sizeof(limb) * 5);
311     fsum(xxprime, zzprime);
312     fdifference_backwards(zzprime, origxprime);
313     fsquare_times(x3, xxprime, 1);
314     fsquare_times(zzzprime, zzprime, 1);
315     fmul(z3, zzzprime, qmqp);
316    
317     fsquare_times(xx, x, 1);
318     fsquare_times(zz, z, 1);
319     fmul(x2, xx, zz);
320     fdifference_backwards(zz, xx); // does zz = xx - zz
321     fscalar_product(zzz, zz, 121665);
322     fsum(zzz, xx);
323     fmul(z2, zz, zzz);
324     }
325    
326     // -----------------------------------------------------------------------------
327     // Maybe swap the contents of two limb arrays (@a and @b), each @len elements
328     // long. Perform the swap iff @swap is non-zero.
329     //
330     // This function performs the swap without leaking any side-channel
331     // information.
332     // -----------------------------------------------------------------------------
333     static void
334     swap_conditional(limb a[5], limb b[5], limb iswap) {
335     unsigned i;
336     const limb swap = -iswap;
337    
338     for (i = 0; i < 5; ++i) {
339     const limb x = swap & (a[i] ^ b[i]);
340     a[i] ^= x;
341     b[i] ^= x;
342     }
343     }
344    
345     /* Calculates nQ where Q is the x-coordinate of a point on the curve
346     *
347     * resultx/resultz: the x coordinate of the resulting curve point (short form)
348     * n: a little endian, 32-byte number
349     * q: a point of the curve (short form)
350     */
351     static void
352     cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
353     limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
354     limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
355     limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
356     limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
357    
358     unsigned i, j;
359    
360     memcpy(nqpqx, q, sizeof(limb) * 5);
361    
362     for (i = 0; i < 32; ++i) {
363     u8 byte = n[31 - i];
364     for (j = 0; j < 8; ++j) {
365     const limb bit = byte >> 7;
366    
367     swap_conditional(nqx, nqpqx, bit);
368     swap_conditional(nqz, nqpqz, bit);
369     fmonty(nqx2, nqz2,
370     nqpqx2, nqpqz2,
371     nqx, nqz,
372     nqpqx, nqpqz,
373     q);
374     swap_conditional(nqx2, nqpqx2, bit);
375     swap_conditional(nqz2, nqpqz2, bit);
376    
377     t = nqx;
378     nqx = nqx2;
379     nqx2 = t;
380     t = nqz;
381     nqz = nqz2;
382     nqz2 = t;
383     t = nqpqx;
384     nqpqx = nqpqx2;
385     nqpqx2 = t;
386     t = nqpqz;
387     nqpqz = nqpqz2;
388     nqpqz2 = t;
389    
390     byte <<= 1;
391     }
392     }
393    
394     memcpy(resultx, nqx, sizeof(limb) * 5);
395     memcpy(resultz, nqz, sizeof(limb) * 5);
396     }
397    
398    
399     // -----------------------------------------------------------------------------
400     // Shamelessly copied from djb's code, tightened a little
401     // -----------------------------------------------------------------------------
402     static void
403     crecip(felem out, const felem z) {
404     felem a,t0,b,c;
405    
406     /* 2 */ fsquare_times(a, z, 1); // a = 2
407     /* 8 */ fsquare_times(t0, a, 2);
408     /* 9 */ fmul(b, t0, z); // b = 9
409     /* 11 */ fmul(a, b, a); // a = 11
410     /* 22 */ fsquare_times(t0, a, 1);
411     /* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
412     /* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
413     /* 2^10 - 2^0 */ fmul(b, t0, b);
414     /* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
415     /* 2^20 - 2^0 */ fmul(c, t0, b);
416     /* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
417     /* 2^40 - 2^0 */ fmul(t0, t0, c);
418     /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
419     /* 2^50 - 2^0 */ fmul(b, t0, b);
420     /* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
421     /* 2^100 - 2^0 */ fmul(c, t0, b);
422     /* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
423     /* 2^200 - 2^0 */ fmul(t0, t0, c);
424     /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
425     /* 2^250 - 2^0 */ fmul(t0, t0, b);
426     /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
427     /* 2^255 - 21 */ fmul(out, t0, a);
428     }
429    
430     int curve25519_donna(u8 *, const u8 *, const u8 *);
431    
432     int
433     curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
434     limb bp[5], x[5], z[5], zmone[5];
435     uint8_t e[32];
436     int i;
437    
438     for (i = 0;i < 32;++i) e[i] = secret[i];
439     e[0] &= 248;
440     e[31] &= 127;
441     e[31] |= 64;
442    
443     fexpand(bp, basepoint);
444     cmult(x, z, e, bp);
445     crecip(zmone, z);
446     fmul(z, x, zmone);
447     fcontract(mypublic, z);
448     return 0;
449     }