x25519.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*
  2. * Based on Michael Hamburg's STROBE reference implementation.
  3. * Copyright (c) 2015-2016 Cryptography Research, Inc.
  4. * MIT License (MIT)
  5. */
  6. #if defined(__GNUC__) && defined(__SIZEOF_INT128__)
  7. #define hydro_x25519_WBITS 64
  8. #else
  9. #define hydro_x25519_WBITS 32
  10. #endif
  11. #if hydro_x25519_WBITS == 64
  12. typedef uint64_t hydro_x25519_limb_t;
  13. typedef __uint128_t hydro_x25519_dlimb_t;
  14. typedef __int128_t hydro_x25519_sdlimb_t;
  15. #define hydro_x25519_eswap_limb(X) LOAD64_LE((const uint8_t *) &(X))
  16. #define hydro_x25519_LIMB(x) x##ull
  17. #elif hydro_x25519_WBITS == 32
  18. typedef uint32_t hydro_x25519_limb_t;
  19. typedef uint64_t hydro_x25519_dlimb_t;
  20. typedef int64_t hydro_x25519_sdlimb_t;
  21. #define hydro_x25519_eswap_limb(X) LOAD32_LE((const uint8_t *) &(X))
  22. #define hydro_x25519_LIMB(x) (uint32_t)(x##ull), (uint32_t) ((x##ull) >> 32)
  23. #else
  24. #error "Need to know hydro_x25519_WBITS"
  25. #endif
  26. #define hydro_x25519_NLIMBS (256 / hydro_x25519_WBITS)
  27. typedef hydro_x25519_limb_t hydro_x25519_fe[hydro_x25519_NLIMBS];
  28. typedef hydro_x25519_limb_t hydro_x25519_scalar_t[hydro_x25519_NLIMBS];
  29. static const hydro_x25519_limb_t hydro_x25519_MONTGOMERY_FACTOR =
  30. (hydro_x25519_limb_t) 0xd2b51da312547e1bull;
  31. static const hydro_x25519_scalar_t hydro_x25519_sc_p = { hydro_x25519_LIMB(0x5812631a5cf5d3ed),
  32. hydro_x25519_LIMB(0x14def9dea2f79cd6),
  33. hydro_x25519_LIMB(0x0000000000000000),
  34. hydro_x25519_LIMB(0x1000000000000000) };
  35. static const hydro_x25519_scalar_t hydro_x25519_sc_r2 = { hydro_x25519_LIMB(0xa40611e3449c0f01),
  36. hydro_x25519_LIMB(0xd00e1ba768859347),
  37. hydro_x25519_LIMB(0xceec73d217f5be65),
  38. hydro_x25519_LIMB(0x0399411b7c309a3d) };
  39. static const uint8_t hydro_x25519_BASE_POINT[hydro_x25519_BYTES] = { 9 };
  40. static const hydro_x25519_limb_t hydro_x25519_a24[1] = { 121665 };
  41. static inline hydro_x25519_limb_t
  42. hydro_x25519_umaal(hydro_x25519_limb_t *carry, hydro_x25519_limb_t acc, hydro_x25519_limb_t mand,
  43. hydro_x25519_limb_t mier)
  44. {
  45. hydro_x25519_dlimb_t tmp = (hydro_x25519_dlimb_t) mand * mier + acc + *carry;
  46. *carry = tmp >> hydro_x25519_WBITS;
  47. return (hydro_x25519_limb_t) tmp;
  48. }
  49. static inline hydro_x25519_limb_t
  50. hydro_x25519_adc(hydro_x25519_limb_t *carry, hydro_x25519_limb_t acc, hydro_x25519_limb_t mand)
  51. {
  52. hydro_x25519_dlimb_t total = (hydro_x25519_dlimb_t) *carry + acc + mand;
  53. *carry = total >> hydro_x25519_WBITS;
  54. return (hydro_x25519_limb_t) total;
  55. }
  56. static inline hydro_x25519_limb_t
  57. hydro_x25519_adc0(hydro_x25519_limb_t *carry, hydro_x25519_limb_t acc)
  58. {
  59. hydro_x25519_dlimb_t total = (hydro_x25519_dlimb_t) *carry + acc;
  60. *carry = total >> hydro_x25519_WBITS;
  61. return (hydro_x25519_limb_t) total;
  62. }
  63. static void
  64. hydro_x25519_propagate(hydro_x25519_fe x, hydro_x25519_limb_t over)
  65. {
  66. hydro_x25519_limb_t carry;
  67. int i;
  68. over = x[hydro_x25519_NLIMBS - 1] >> (hydro_x25519_WBITS - 1) | over << 1;
  69. x[hydro_x25519_NLIMBS - 1] &= ~((hydro_x25519_limb_t) 1 << (hydro_x25519_WBITS - 1));
  70. carry = over * 19;
  71. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  72. x[i] = hydro_x25519_adc0(&carry, x[i]);
  73. }
  74. }
  75. static void
  76. hydro_x25519_add(hydro_x25519_fe out, const hydro_x25519_fe a, const hydro_x25519_fe b)
  77. {
  78. hydro_x25519_limb_t carry = 0;
  79. int i;
  80. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  81. out[i] = hydro_x25519_adc(&carry, a[i], b[i]);
  82. }
  83. hydro_x25519_propagate(out, carry);
  84. }
  85. static void
  86. hydro_x25519_sub(hydro_x25519_fe out, const hydro_x25519_fe a, const hydro_x25519_fe b)
  87. {
  88. hydro_x25519_sdlimb_t carry = -38;
  89. int i;
  90. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  91. out[i] = (hydro_x25519_limb_t) (carry = carry + a[i] - b[i]);
  92. carry >>= hydro_x25519_WBITS;
  93. }
  94. hydro_x25519_propagate(out, (hydro_x25519_limb_t) (1 + carry));
  95. }
  96. static void
  97. hydro_x25519_swapin(hydro_x25519_limb_t *x, const uint8_t *in)
  98. {
  99. int i;
  100. memcpy(x, in, sizeof(hydro_x25519_fe));
  101. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  102. x[i] = hydro_x25519_eswap_limb(x[i]);
  103. }
  104. }
  105. static void
  106. hydro_x25519_swapout(uint8_t *out, hydro_x25519_limb_t *x)
  107. {
  108. int i;
  109. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  110. x[i] = hydro_x25519_eswap_limb(x[i]);
  111. }
  112. memcpy(out, x, sizeof(hydro_x25519_fe));
  113. }
  114. static void
  115. hydro_x25519_mul(hydro_x25519_fe out, const hydro_x25519_fe a, const hydro_x25519_limb_t b[],
  116. const int nb)
  117. {
  118. hydro_x25519_limb_t accum[2 * hydro_x25519_NLIMBS] = { 0 };
  119. hydro_x25519_limb_t carry2;
  120. int i, j;
  121. for (i = 0; i < nb; i++) {
  122. hydro_x25519_limb_t mand = b[i];
  123. carry2 = 0;
  124. for (j = 0; j < hydro_x25519_NLIMBS; j++) {
  125. accum[i + j] = hydro_x25519_umaal(&carry2, accum[i + j], mand, a[j]);
  126. }
  127. accum[i + j] = carry2;
  128. }
  129. carry2 = 0;
  130. for (j = 0; j < hydro_x25519_NLIMBS; j++) {
  131. const hydro_x25519_limb_t mand = 38;
  132. out[j] = hydro_x25519_umaal(&carry2, accum[j], mand, accum[j + hydro_x25519_NLIMBS]);
  133. }
  134. hydro_x25519_propagate(out, carry2);
  135. }
  136. static void
  137. hydro_x25519_sqr(hydro_x25519_fe out, const hydro_x25519_fe a)
  138. {
  139. hydro_x25519_mul(out, a, a, hydro_x25519_NLIMBS);
  140. }
  141. static void
  142. hydro_x25519_mul1(hydro_x25519_fe out, const hydro_x25519_fe a)
  143. {
  144. hydro_x25519_mul(out, a, out, hydro_x25519_NLIMBS);
  145. }
  146. static void
  147. hydro_x25519_sqr1(hydro_x25519_fe a)
  148. {
  149. hydro_x25519_mul1(a, a);
  150. }
  151. static void
  152. hydro_x25519_condswap(hydro_x25519_limb_t a[2 * hydro_x25519_NLIMBS],
  153. hydro_x25519_limb_t b[2 * hydro_x25519_NLIMBS], hydro_x25519_limb_t doswap)
  154. {
  155. int i;
  156. for (i = 0; i < 2 * hydro_x25519_NLIMBS; i++) {
  157. hydro_x25519_limb_t xorv = (a[i] ^ b[i]) & doswap;
  158. a[i] ^= xorv;
  159. b[i] ^= xorv;
  160. }
  161. }
  162. static int
  163. hydro_x25519_canon(hydro_x25519_fe x)
  164. {
  165. hydro_x25519_sdlimb_t carry;
  166. hydro_x25519_limb_t carry0 = 19;
  167. hydro_x25519_limb_t res;
  168. int i;
  169. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  170. x[i] = hydro_x25519_adc0(&carry0, x[i]);
  171. }
  172. hydro_x25519_propagate(x, carry0);
  173. carry = -19;
  174. res = 0;
  175. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  176. res |= x[i] = (hydro_x25519_limb_t) (carry += x[i]);
  177. carry >>= hydro_x25519_WBITS;
  178. }
  179. return ((hydro_x25519_dlimb_t) res - 1) >> hydro_x25519_WBITS;
  180. }
  181. static void
  182. hydro_x25519_ladder_part1(hydro_x25519_fe xs[5])
  183. {
  184. hydro_x25519_limb_t *x2 = xs[0], *z2 = xs[1], *x3 = xs[2], *z3 = xs[3], *t1 = xs[4];
  185. hydro_x25519_add(t1, x2, z2); // t1 = A
  186. hydro_x25519_sub(z2, x2, z2); // z2 = B
  187. hydro_x25519_add(x2, x3, z3); // x2 = C
  188. hydro_x25519_sub(z3, x3, z3); // z3 = D
  189. hydro_x25519_mul1(z3, t1); // z3 = DA
  190. hydro_x25519_mul1(x2, z2); // x3 = BC
  191. hydro_x25519_add(x3, z3, x2); // x3 = DA+CB
  192. hydro_x25519_sub(z3, z3, x2); // z3 = DA-CB
  193. hydro_x25519_sqr1(t1); // t1 = AA
  194. hydro_x25519_sqr1(z2); // z2 = BB
  195. hydro_x25519_sub(x2, t1, z2); // x2 = E = AA-BB
  196. hydro_x25519_mul(z2, x2, hydro_x25519_a24, // z2 = E*a24
  197. sizeof(hydro_x25519_a24) / sizeof(hydro_x25519_a24[0]));
  198. hydro_x25519_add(z2, z2, t1); // z2 = E*a24 + AA
  199. }
  200. static void
  201. hydro_x25519_ladder_part2(hydro_x25519_fe xs[5], const hydro_x25519_fe x1)
  202. {
  203. hydro_x25519_limb_t *x2 = xs[0], *z2 = xs[1], *x3 = xs[2], *z3 = xs[3], *t1 = xs[4];
  204. hydro_x25519_sqr1(z3); // z3 = (DA-CB)^2
  205. hydro_x25519_mul1(z3, x1); // z3 = x1 * (DA-CB)^2
  206. hydro_x25519_sqr1(x3); // x3 = (DA+CB)^2
  207. hydro_x25519_mul1(z2, x2); // z2 = AA*(E*a24+AA)
  208. hydro_x25519_sub(x2, t1, x2); // x2 = BB again
  209. hydro_x25519_mul1(x2, t1); // x2 = AA*BB
  210. }
  211. static void
  212. hydro_x25519_core(hydro_x25519_fe xs[5], const uint8_t scalar[hydro_x25519_BYTES],
  213. const uint8_t *x1, bool clamp)
  214. {
  215. hydro_x25519_limb_t swap;
  216. hydro_x25519_limb_t *x2 = xs[0], *x3 = xs[2], *z3 = xs[3];
  217. hydro_x25519_fe x1i;
  218. int i;
  219. hydro_x25519_swapin(x1i, x1);
  220. x1 = (const uint8_t *) x1i;
  221. swap = 0;
  222. mem_zero(xs, 4 * sizeof(hydro_x25519_fe));
  223. x2[0] = z3[0] = 1;
  224. memcpy(x3, x1, sizeof(hydro_x25519_fe));
  225. for (i = 255; i >= 0; i--) {
  226. uint8_t bytei = scalar[i / 8];
  227. hydro_x25519_limb_t doswap;
  228. hydro_x25519_fe x1_dup;
  229. if (clamp) {
  230. if (i / 8 == 0) {
  231. bytei &= ~7;
  232. } else if (i / 8 == hydro_x25519_BYTES - 1) {
  233. bytei &= 0x7F;
  234. bytei |= 0x40;
  235. }
  236. }
  237. doswap = 1U + ~(hydro_x25519_limb_t) ((bytei >> (i % 8)) & 1);
  238. hydro_x25519_condswap(x2, x3, swap ^ doswap);
  239. swap = doswap;
  240. hydro_x25519_ladder_part1(xs);
  241. memcpy(x1_dup, x1, sizeof x1_dup);
  242. hydro_x25519_ladder_part2(xs, x1_dup);
  243. }
  244. hydro_x25519_condswap(x2, x3, swap);
  245. }
  246. static int
  247. hydro_x25519_scalarmult(uint8_t out[hydro_x25519_BYTES],
  248. const uint8_t scalar[hydro_x25519_SECRETKEYBYTES],
  249. const uint8_t x1[hydro_x25519_PUBLICKEYBYTES], bool clamp)
  250. {
  251. hydro_x25519_fe xs[5];
  252. hydro_x25519_limb_t *x2, *z2, *z3;
  253. hydro_x25519_limb_t *prev;
  254. int i;
  255. int ret;
  256. hydro_x25519_core(xs, scalar, x1, clamp);
  257. /* Precomputed inversion chain */
  258. x2 = xs[0];
  259. z2 = xs[1];
  260. z3 = xs[3];
  261. prev = z2;
  262. /* Raise to the p-2 = 0x7f..ffeb */
  263. for (i = 253; i >= 0; i--) {
  264. hydro_x25519_sqr(z3, prev);
  265. prev = z3;
  266. if (i >= 8 || (0xeb >> i & 1)) {
  267. hydro_x25519_mul1(z3, z2);
  268. }
  269. }
  270. /* Here prev = z3 */
  271. /* x2 /= z2 */
  272. hydro_x25519_mul1(x2, z3);
  273. ret = hydro_x25519_canon(x2);
  274. hydro_x25519_swapout(out, x2);
  275. if (clamp == 0) {
  276. return 0;
  277. }
  278. return ret;
  279. }
  280. static inline int
  281. hydro_x25519_scalarmult_base(uint8_t pk[hydro_x25519_PUBLICKEYBYTES],
  282. const uint8_t sk[hydro_x25519_SECRETKEYBYTES])
  283. {
  284. return hydro_x25519_scalarmult(pk, sk, hydro_x25519_BASE_POINT, 1);
  285. }
  286. static inline void
  287. hydro_x25519_scalarmult_base_uniform(uint8_t pk[hydro_x25519_PUBLICKEYBYTES],
  288. const uint8_t sk[hydro_x25519_SECRETKEYBYTES])
  289. {
  290. if (hydro_x25519_scalarmult(pk, sk, hydro_x25519_BASE_POINT, 0) != 0) {
  291. abort();
  292. }
  293. }
  294. static void
  295. hydro_x25519_sc_montmul(hydro_x25519_scalar_t out, const hydro_x25519_scalar_t a,
  296. const hydro_x25519_scalar_t b)
  297. {
  298. hydro_x25519_limb_t hic = 0;
  299. int i, j;
  300. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  301. hydro_x25519_limb_t carry = 0, carry2 = 0, mand = a[i],
  302. mand2 = hydro_x25519_MONTGOMERY_FACTOR;
  303. for (j = 0; j < hydro_x25519_NLIMBS; j++) {
  304. hydro_x25519_limb_t acc = out[j];
  305. acc = hydro_x25519_umaal(&carry, acc, mand, b[j]);
  306. if (j == 0) {
  307. mand2 *= acc;
  308. }
  309. acc = hydro_x25519_umaal(&carry2, acc, mand2, hydro_x25519_sc_p[j]);
  310. if (j > 0) {
  311. out[j - 1] = acc;
  312. }
  313. }
  314. /* Add two carry registers and high carry */
  315. out[hydro_x25519_NLIMBS - 1] = hydro_x25519_adc(&hic, carry, carry2);
  316. }
  317. /* Reduce */
  318. hydro_x25519_sdlimb_t scarry = 0;
  319. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  320. out[i] = (hydro_x25519_limb_t) (scarry = scarry + out[i] - hydro_x25519_sc_p[i]);
  321. scarry >>= hydro_x25519_WBITS;
  322. }
  323. hydro_x25519_limb_t need_add = (hydro_x25519_limb_t) - (scarry + hic);
  324. hydro_x25519_limb_t carry = 0;
  325. for (i = 0; i < hydro_x25519_NLIMBS; i++) {
  326. out[i] = hydro_x25519_umaal(&carry, out[i], need_add, hydro_x25519_sc_p[i]);
  327. }
  328. }