/* * Copyright (c) 2015, Lars Schmertmann , * Jens Trillmann . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * This file is part of the Contiki operating system. * */ #include "ecc.h" #include #define X 0 #define Y 8 #define Z 16 const uint32_t ecc_prime_m[8] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xffffffff }; const uint32_t ecc_prime_r[8] = { 0x00000001, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x00000000 }; /*---------------------------------------------------------------------------*/ #define DEBUG 0 #define SELF_TEST 0 #if DEBUG #include #define PRINTF(...) printf(__VA_ARGS__) #define PRINTHEX(...) print_hex(__VA_ARGS__) static void print_hex(const char *name, const uint32_t *d, uint32_t l) { printf("%s:", name); int i; for(i = l - 1; i >= 0; --i) { printf(" %08X", d[i]); } printf("\n"); } #else #define PRINTF(...) #define PRINTHEX(...) #endif #if SELF_TEST #include static void selfTest(); #endif /* private prototypes ----------------------------------------------------- */ /* simple functions to work with 256 bit numbers */ static void ecc_setZero(uint32_t *a); static void ecc_copy(uint32_t *dst, const uint32_t *src); static uint32_t ecc_isX(const uint32_t *a, const uint32_t x); static void ecc_rshift(uint32_t *a); static void ecc_replace(uint32_t bit, uint32_t *dst, uint32_t *src); static uint32_t ecc_add(uint32_t *result, const uint32_t *a, const uint32_t *b); static uint32_t ecc_sub(uint32_t *result, const uint32_t *a, const uint32_t *b); static void ecc_mult(uint32_t *result, const uint32_t *x, const uint32_t *y, const uint32_t length); /* ecc_field_ModP-Helper */ __attribute__((always_inline)) static void ecc_form_s1(uint32_t *dst, const uint32_t *src); __attribute__((always_inline)) static void ecc_form_s2(uint32_t *dst, const uint32_t *src); __attribute__((always_inline)) static void ecc_form_s3(uint32_t *dst, const uint32_t *src); __attribute__((always_inline)) static void ecc_form_s4(uint32_t *dst, const uint32_t *src); __attribute__((always_inline)) static void ecc_form_d1(uint32_t *dst, const uint32_t *src); __attribute__((always_inline)) static void ecc_form_d2(uint32_t *dst, const uint32_t *src); __attribute__((always_inline)) static void ecc_form_d3(uint32_t *dst, const uint32_t *src); __attribute__((always_inline)) static void ecc_form_d4(uint32_t *dst, const uint32_t *src); /* field functions for 256 bit numbers */ static void ecc_field_Add(uint32_t *result, const uint32_t *x, const uint32_t *y); static void ecc_field_Sub(uint32_t *result, const uint32_t *x, const uint32_t *y); static void ecc_field_ModP(uint32_t *result, const uint32_t *T); static void ecc_field_Mult(uint32_t *result, const uint32_t *A, const uint32_t *B); static void ecc_field_Inv(uint32_t *result, const uint32_t *A); /* new projective stuff */ static void ecc_projective_double(uint32_t *val); static void ecc_projective_add(uint32_t *result, const uint32_t *val_1, const uint32_t *x_2, const uint32_t *y_2, const uint32_t *z_2); /* public functions -------------------------------------------------------- */ int32_t ecc_compare(const uint32_t *a, const uint32_t *b) { int32_t r = 0; uint32_t i = 8; while(i--) { uint32_t neq = (a[i] != b[i]); int32_t greater = (a[i] > b[i] ? 1 : -1); r ^= ((-(!r && neq)) & (r ^ greater)); } return r; } void ecc_ec_mult(uint32_t *resultx, uint32_t *resulty, const uint32_t *px, const uint32_t *py, const uint32_t *secret) { #if SELF_TEST selfTest(); #endif PRINTHEX("PX", px, 8); PRINTHEX("PY", py, 8); PRINTHEX("SC", secret, 8); uint32_t Q[24]; ecc_setZero(Q + X); ecc_setZero(Q + Y); ecc_setZero(Q + Z); Q[Z] = 0x00000001; uint32_t pz[8]; ecc_setZero(pz); pz[0] = 0x00000001; uint32_t temp[24]; int i; for(i = 255; i >= 0; --i) { ecc_projective_double(Q); /* PRINTHEX("QX", Q+X, 8); */ /* PRINTHEX("QY", Q+Y, 8); */ /* PRINTHEX("QZ", Q+Z, 8); */ ecc_projective_add(temp, Q, px, py, pz); /* PRINTHEX("QX", temp+X, 8); */ /* PRINTHEX("QY", temp+Y, 8); */ /* PRINTHEX("QZ", temp+Z, 8); */ int current_bit = (secret[i / 32] >> (i % 32)) & 0x1; /* ((secret[i / 32]) & ((uint32_t)1 << (i % 32))); */ ecc_replace(current_bit, Q, temp); /* PRINTHEX("QX", Q+X, 8); */ /* PRINTHEX("QY", Q+Y, 8); */ /* PRINTHEX("QZ", Q+Z, 8); */ } /* PRINTHEX("QX", Q+X, 8); */ /* PRINTHEX("QY", Q+Y, 8); */ /* PRINTHEX("QZ", Q+Z, 8); */ ecc_field_Inv(temp, Q + Z); ecc_field_Mult(resultx, Q + X, temp); ecc_field_Mult(resulty, Q + Y, temp); PRINTHEX("RX", resultx, 8); PRINTHEX("RY", resulty, 8); } /* private functions ------------------------------------------------------- */ static void ecc_setZero(uint32_t *a) { asm volatile ( "mov r1, $0 \n\t" "mov r2, r1 \n\t" "mov r3, r2 \n\t" "mov r4, r3 \n\t" "stm %[a]!, {r1-r4} \n\t" "stm %[a]!, {r1-r4} \n\t" : /* out */ : /* in */ [a] "l" (a) : /* clobber list */ "r1", "r2", "r3", "r4", "memory" ); } /* * copy one array to another */ static void ecc_copy(uint32_t *dst, const uint32_t *src) { asm volatile ( "ldm %[s]!, {r2-r5} \n\t" "stm %[d]!, {r2-r5} \n\t" "ldm %[s]!, {r2-r5} \n\t" "stm %[d]!, {r2-r5} \n\t" : /* out */ : /* in */ [d] "l" (dst), [s] "l" (src) : /* clobber list */ "r2", "r3", "r4", "r5", "memory" ); } static uint32_t ecc_isX(const uint32_t *a, const uint32_t x) { uint32_t r = (a[0] == x); uint32_t n = 8; while(--n) { r &= (a[n] == 0); } return r; } static void ecc_rshift(uint32_t *a) { uint32_t index = 32; uint32_t carry = 0; asm volatile ( "0: \n\t" "sub %[i], %[i], #4 \n\t" /* index -= 4 */ "mov r4, %[c] \n\t" /* result = carry */ "ldr r3, [%[a],%[i]] \n\t" /* value = a[index] */ "lsl %[c], r3, #31 \n\t" /* carry = value << 31 */ "lsr r3, r3, #1 \n\t" /* value >>= 1 */ "orr r4, r4, r3 \n\t" /* result |= value */ "str r4, [%[a],%[i]] \n\t" /* a[index] = result */ "cmp %[i], $0 \n\t" /* index == 0 */ "bne 0b \n\t" /* != ? next loop */ : /* out */ : /* in */ [a] "r" (a), [i] "r" (index), [c] "r" (carry) : /* clobber list */ "r3", "r4", "memory" ); } static void ecc_replace(uint32_t bit, uint32_t *dst, uint32_t *src) { bit = -bit; int i; for(i = 0; i < 24; i++) { dst[i] ^= (bit & (dst[i] ^ src[i])); } } static uint32_t ecc_add(uint32_t *result, const uint32_t *a, const uint32_t *b) { uint32_t carry; asm volatile ( "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "add r4, r4, r6 \n\t" "adc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "adc r4, r4, r6 \n\t" "adc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "adc r4, r4, r6 \n\t" "adc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "adc r4, r4, r6 \n\t" "adc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "bcc 0f \n\t" "mov %[c], #1 \n\t" "b 1f \n\t" "0: \n\t" "mov %[c], $0 \n\t" "1: \n\t" : /* out */ [c] "=l" (carry) : /* in */ [x] "l" (a), [y] "l" (b), [r] "l" (result) : /* clobber list */ "r4", "r5", "r6", "r7", "memory" ); return carry; } static uint32_t ecc_sub(uint32_t *result, const uint32_t *a, const uint32_t *b) { uint32_t carry; asm volatile ( "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "sub r4, r4, r6 \n\t" "sbc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "sbc r4, r4, r6 \n\t" "sbc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "sbc r4, r4, r6 \n\t" "sbc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "ldm %[x]!, {r4,r5} \n\t" "ldm %[y]!, {r6,r7} \n\t" "sbc r4, r4, r6 \n\t" "sbc r5, r5, r7 \n\t" "stm %[r]!, {r4,r5} \n\t" "bcs 0f \n\t" "mov %[c], #1 \n\t" "b 1f \n\t" "0: \n\t" "mov %[c], $0 \n\t" "1: \n\t" : /* out */ [c] "=l" (carry) : /* in */ [x] "l" (a), [y] "l" (b), [r] "l" (result) : /* clobber list */ "r4", "r5", "r6", "r7", "memory" ); return carry; } static void ecc_mult(uint32_t *result, const uint32_t *x, const uint32_t *y, const uint32_t length) { if(length == 1) { /* Version 1: 56 Byte bigger as ASM-Version */ /* uint64_t *r = (uint64_t *) result; */ /* *r = (uint64_t) x[0] * (uint64_t) y[0]; */ /* Version 2: 56 Byte lesser as Version 1 but same speed */ asm volatile ( "ldrh r5, [%[x], $0] \n\t" /* r5 = (x[0] & 0x0000FFFF) */ "ldrh r3, [%[y], $0] \n\t" /* r3 = (y[0] & 0x0000FFFF) */ "mul r5, r3 \n\t" /* r5 *= r3 r5 = AB[0] */ "ldrh r6, [%[x], #2] \n\t" /* r6 = (x[0] >> 16) */ "mul r3, r6 \n\t" /* r3 *= r6 r3 = C[0] */ "ldrh r4, [%[y], #2] \n\t" /* r4 = (y[0] >> 16) */ "mul r6, r4 \n\t" /* r6 *= r4 r6 = AB[1] */ /* %[y] is not longer needed - its called ry now */ "ldrh %[y], [%[x], $0] \n\t" /* ry = (x[0] & 0x0000FFFF) */ "mul r4, %[y] \n\t" /* r4 *= ry r4 = C[1] */ "add %[y], r3, r4 \n\t" /* ry = r3 + r4 ry = C[0] + C[1] */ /* C[1] (r4) is not longer needed */ "mov r4, $0 \n\t" /* r4 = 0 */ "bcc 0f \n\t" /* jump if carry clear */ "mov r4, #1 \n\t" /* r4 = 1 */ "lsl r4, r4, #16 \n\t" /* r4 <<= 16 */ "0: \n\t" /* r4 = 0x000c0000 = (carry << 16) */ "lsr r3, %[y], #16 \n\t" /* r3 = (ry >> 16) */ "orr r4, r4, r3 \n\t" /* r4 |= r3 r4 = 0x000c'ryh' = (r4 | ry >> 16) */ "lsl r3, %[y], #16 \n\t" /* r3 = (ry << 16) r3 = 0x'ryl'0000 = (ry << 16) */ "add r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[r]!, {r3, r4} \n\t" : /* out */ : /* in */ [x] "l" (x), [y] "l" (y), [r] "l" (result) : /* clobber list */ "r3", "r4", "r5", "r6", "memory" ); } else { uint32_t carry; uint32_t C[length * 2]; ecc_mult(result, x, y, length / 2); ecc_mult(result + length, x + (length / 2), y + (length / 2), length / 2); ecc_mult(C, x, y + (length / 2), length / 2); ecc_mult(C + length, x + (length / 2), y, length / 2); if(length == 8) { carry = ecc_add(C, C, C + length); } else { asm volatile ( "cmp %[l], #2 \n\t" "beq .add2 \n\t" /* ASM for: ecc_add(C, C, C + 4, 4); */ "mov %[l], %[a] \n\t" "ldm %[a]!, {r3-r6} \n\t" "ldm %[a]!, {r5,r6} \n\t" "sub %[a], %[a], #16 \n\t" "add r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[a]!, {r3-r6} \n\t" "ldm %[a]!, {r5,r6} \n\t" "adc r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "b 0f \n\t" ".add2: \n\t" /* ASM for: ecc_add(C, C, C + 2, 2); */ "ldm %[a]!, {r3-r6} \n\t" "sub %[a], %[a], #16 \n\t" "add r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[a]!, {r3,r4} \n\t" "0: \n\t" "bcc 1f \n\t" "mov %[c], #1 \n\t" "b 2f \n\t" "1: \n\t" "mov %[c], $0 \n\t" "2: \n\t" : /* out */ [c] "=l" (carry) : /* in */ [a] "l" (C), [l] "l" (length) : /* clobber list */ "r3", "r4", "r5", "r6", "memory" ); } C[length] = carry; asm volatile ( "cmp %[l], #2 \n\t" "beq .add3 \n\t" "cmp %[l], #4 \n\t" "beq .add6 \n\t" ".add12: \n\t" /* ASM for: ecc_add(result + 4, result + 4, C, 12); */ /* RRRRRRRRRRRRRRRR */ /* + CCCCCCCCC000 */ /* = RRRRRRRRRRRRRRRR */ "add %[r], %[r], #16 \n\t" "mov %[l], %[r] \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5,r6} \n\t" "add r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5,r6} \n\t" "adc r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5,r6} \n\t" "adc r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5,r6} \n\t" "adc r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5} \n\t" "mov r6, $0 \n\t" "adc r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[r]!, {r3,r4} \n\t" "adc r3, r3, r6 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "b 0f \n\t" ".add6: \n\t" /* ASM for: ecc_add(result + 2, result + 2, C, 6); */ /* RRRRRRRR */ /* + CCCCC0 */ /* = RRRRRRRR */ "add %[r], %[r], #8 \n\t" "mov %[l], %[r] \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5,r6} \n\t" "add r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5,r6} \n\t" "adc r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5} \n\t" "mov r6, $0 \n\t" "adc r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "stm %[l]!, {r3,r4} \n\t" "b 0f \n\t" ".add3: \n\t" /* ASM for: ecc_add(result + 1, result + 1, C, 3); */ /* RRRR */ /* + CCC */ /* = RRRR */ "add %[r], %[r], #4 \n\t" "mov %[l], %[r] \n\t" "ldm %[r]!, {r3,r4} \n\t" "ldm %[c]!, {r5,r6} \n\t" "add r3, r3, r5 \n\t" "adc r4, r4, r6 \n\t" "ldr r5, [%[r], $0] \n\t" "ldr r6, [%[c], $0] \n\t" "adc r5, r5, r6 \n\t" "stm %[l]!, {r3-r5} \n\t" "0: \n\t" : /* out */ : /* in */ [r] "l" (result), [c] "l" (C), [l] "l" (length) : /* clobber list */ "r3", "r4", "r5", "r6", "memory" ); } } /*---------------------------------------------------------------------------*/ __attribute__((always_inline)) static void ecc_form_s1(uint32_t *dst, const uint32_t *src) { /* 0, 0, 0, src[11], src[12], src[13], src[14], src[15] */ asm volatile ( "mov r2, $0 \n\t" "mov r3, r2 \n\t" "mov r4, r3 \n\t" "stm %[d]!, {r2-r4} \n\t" "add %[s], #44 \n\t" "ldm %[s]!, {r2-r6} \n\t" "stm %[d]!, {r2-r6} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "r6", "memory" ); } __attribute__((always_inline)) static void ecc_form_s2(uint32_t *dst, const uint32_t *src) { /* 0, 0, 0, src[12], src[13], src[14], src[15], 0 */ asm volatile ( "mov r2, $0 \n\t" "mov r3, r2 \n\t" "mov r4, r3 \n\t" "stm %[d]!, {r2-r4} \n\t" "add %[s], #48 \n\t" "ldm %[s]!, {r2-r5} \n\t" "stm %[d]!, {r2-r5} \n\t" "mov r2, $0 \n\t" "stm %[d]!, {r2} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "memory" ); } __attribute__((always_inline)) static void ecc_form_s3(uint32_t *dst, const uint32_t *src) { /* src[8], src[9], src[10], 0, 0, 0, src[14], src[15] */ asm volatile ( "add %[s], #32 \n\t" "ldm %[s]!, {r2-r4} \n\t" "mov r5, $0 \n\t" "stm %[d]!, {r2-r5} \n\t" "mov r2, r5 \n\t" "mov r3, r2 \n\t" "add %[s], #12 \n\t" "ldm %[s]!, {r4,r5} \n\t" "stm %[d]!, {r2-r5} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "memory" ); } __attribute__((always_inline)) static void ecc_form_s4(uint32_t *dst, const uint32_t *src) { /* src[9], src[10], src[11], src[13], src[14], src[15], src[13], src[8] */ asm volatile ( "add %[s], #32 \n\t" "ldm %[s]!, {r2-r5} \n\t" "stm %[d]!, {r3-r5} \n\t" "add %[s], #4 \n\t" "ldm %[s]!, {r3-r5} \n\t" "stm %[d]!, {r3-r5} \n\t" "mov r4, r2 \n\t" "stm %[d]!, {r3,r4} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "memory" ); } __attribute__((always_inline)) static void ecc_form_d1(uint32_t *dst, const uint32_t *src) { /* src[11], src[12], src[13], 0, 0, 0, src[8], src[10] */ asm volatile ( "add %[s], #32 \n\t" "ldm %[s]!, {r2-r7} \n\t" "stm %[d]!, {r5-r7} \n\t" "mov r3, $0 \n\t" "mov r5, r3 \n\t" "mov r6, r5 \n\t" "stm %[d]!, {r3,r5,r6} \n\t" "stm %[d]!, {r2,r4} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "r6", "r7", "memory" ); } __attribute__((always_inline)) static void ecc_form_d2(uint32_t *dst, const uint32_t *src) { /* src[12], src[13], src[14], src[15], 0, 0, src[9], src[11] */ asm volatile ( "add %[s], #48 \n\t" "ldm %[s]!, {r2-r5} \n\t" "stm %[d]!, {r2-r5} \n\t" "sub %[s], #28 \n\t" "ldm %[s]!, {r4-r6} \n\t" "mov r2, $0 \n\t" "mov r3, r2 \n\t" "stm %[d]!, {r2-r4,r6} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "r6", "memory" ); } __attribute__((always_inline)) static void ecc_form_d3(uint32_t *dst, const uint32_t *src) { /* src[13], src[14], src[15], src[8], src[9], src[10], 0, src[12] */ asm volatile ( "add %[s], #52 \n\t" "ldm %[s]!, {r2-r4} \n\t" "stm %[d]!, {r2-r4} \n\t" "sub %[s], #32 \n\t" "ldm %[s]!, {r2-r6} \n\t" "mov r5, $0 \n\t" "stm %[d]!, {r2-r6} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "r6", "memory" ); } __attribute__((always_inline)) static void ecc_form_d4(uint32_t *dst, const uint32_t *src) { /* src[14], src[15], 0, src[9], src[10], src[11], 0, src[13] */ asm volatile ( "add %[s], #56 \n\t" "ldm %[s]!, {r2,r3} \n\t" "mov r4, $0 \n\t" "stm %[d]!, {r2-r4} \n\t" "sub %[s], #28 \n\t" "ldm %[s]!, {r2-r6} \n\t" "mov r5, $0 \n\t" "stm %[d]!, {r2-r6} \n\t" : /* out */ [d] "+l" (dst), [s] "+l" (src) : /* in */ : /* clobber list */ "r2", "r3", "r4", "r5", "r6", "memory" ); } /*---------------------------------------------------------------------------*/ static void ecc_field_Add(uint32_t *result, const uint32_t *x, const uint32_t *y) { uint32_t temp[8]; uint32_t carry = -ecc_add(result, x, y); ecc_add(temp, result, ecc_prime_r); int i; for(i = 0; i < 8; i++) { result[i] ^= (carry & (result[i] ^ temp[i])); } } static void ecc_field_Sub(uint32_t *result, const uint32_t *x, const uint32_t *y) { uint32_t temp[8]; uint32_t carry = -ecc_sub(result, x, y); ecc_add(temp, result, ecc_prime_m); int i; for(i = 0; i < 8; i++) { result[i] ^= (carry & (result[i] ^ temp[i])); } } static void ecc_field_ModP(uint32_t *result, const uint32_t *T) { uint32_t SX_o_DX[8]; ecc_copy(result, T); /* result = T */ ecc_form_s1(SX_o_DX, T); /* Form S1 */ ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 */ ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 */ ecc_form_s2(SX_o_DX, T); /* Form S2 */ ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 */ ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 */ ecc_form_s3(SX_o_DX, T); /* Form S3 */ ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 */ ecc_form_s4(SX_o_DX, T); /* Form S4 */ ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 */ ecc_form_d1(SX_o_DX, T); /* Form D1 */ ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 */ ecc_form_d2(SX_o_DX, T); /* Form D2 */ ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 - D2 */ ecc_form_d3(SX_o_DX, T); /* Form D3 */ ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 - D2 - D3 */ ecc_form_d4(SX_o_DX, T); /* Form D4 */ ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 - D2 - D3 - D4 */ if(ecc_compare(result, ecc_prime_m) >= 0) { ecc_field_Sub(result, result, ecc_prime_m); } } static void ecc_field_Mult(uint32_t *result, const uint32_t *A, const uint32_t *B) { uint32_t product[16]; ecc_mult(product, A, B, 8); ecc_field_ModP(result, product); } static void ecc_field_Inv(uint32_t *result, const uint32_t *A) { PRINTHEX("Input", A, 8); ecc_setZero(result); result[0] = 0x00000001; int i; for(i = 255; i >= 0; --i) { ecc_field_Mult(result, result, result); if(((ecc_prime_m[i / 32] >> (i % 32)) & 0x1) == 1 && i != 1) { ecc_field_Mult(result, result, A); } } PRINTHEX("Result", result, 8); } /*---------------------------------------------------------------------------*/ static void ecc_projective_double(uint32_t *val) { /* Algorithm taken from https://hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html#doubling-dbl-2007-bl-2 */ /* w = 3*(X1-Z1)*(X1+Z1) */ /* s = 2*Y1*Z1 */ /* ss = s^2 */ /* sss = s*ss */ /* R = Y1*s */ /* RR = R^2 */ /* B = 2*X1*R */ /* h = w^2-2*B */ /* X3 = h*s */ /* Y3 = w*(B-h)-2*RR */ /* Z3 = sss */ uint32_t temp[24]; uint32_t w[8]; uint32_t s[8]; uint32_t B[8]; uint32_t h[8]; uint8_t is_zero = ecc_isX(val + X, 0) & ecc_isX(val + Y, 0) & ecc_isX(val + Z, 1); ecc_field_Sub(temp + X, val + X, val + Z); ecc_field_Add(temp + Y, val + X, val + Z); ecc_field_Mult(temp + Z, temp + X, temp + Y); ecc_field_Add(temp + X, temp + Z, temp + Z); ecc_field_Add(w, temp + Z, temp + X); ecc_field_Mult(temp + X, val + Y, val + Z); ecc_field_Add(s, temp + X, temp + X); ecc_field_Mult(temp + X, s, s); ecc_field_Mult(val + Z, s, temp + X); ecc_field_Mult(temp + X, val + Y, s); /* temp = R */ ecc_field_Mult(temp + Z, temp + X, temp + X); /* temp3 = RR */ ecc_field_Mult(temp + Y, val + X, temp + X); /* temp2 = R*x */ ecc_field_Add(B, temp + Y, temp + Y); /* B = 2*R*x */ ecc_field_Mult(temp + X, w, w); ecc_field_Add(temp + Y, B, B); ecc_field_Sub(h, temp + X, temp + Y); ecc_field_Mult(val + X, h, s); ecc_field_Sub(temp + X, B, h); ecc_field_Mult(temp + Y, w, temp + X); ecc_field_Add(temp + Z, temp + Z, temp + Z); /* temp3 = 2*RR */ ecc_field_Sub(val + Y, temp + Y, temp + Z); /* finished, now swap the result if necessary */ ecc_setZero(temp + X); ecc_setZero(temp + Y); ecc_setZero(temp + Z); (temp + Z)[0] = 0x00000001; ecc_replace(is_zero, val, temp); } static void ecc_projective_add(uint32_t *result, const uint32_t *val_1, const uint32_t *x_2, const uint32_t *y_2, const uint32_t *z_2) { /* algorithm taken from https://hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html#addition-add-1998-cmo-2 */ /* X Z X Y U Y */ /* 1 1 2 2 U UU V 1V */ /* Z R Z ZVZ R UZZ VR ZY */ /* VX 2RAY 2Z 1V1UA UZV V2A 2Z */ /* Y1Z2 = Y1*Z2 | */ /* X2Z1 = X2*Z1 | | */ /* X1Z2 = X1*Z2 | | | */ /* V = X2Z1-X1Z2 | x x | */ /* VV = V^2 x | | | */ /* R = VV*X1Z2 | x| x | */ /* VVV = V*VV x | x | | */ /* Y2Z1 = Y2*Z1 | | | | | */ /* U = Y2Z1-Y1Z2 | | x| | x */ /* UU = U^2 | | x | | | */ /* Z1Z2 = Z1*Z2 | | | | | | | */ /* UUZZ = UU*Z1Z2 | | x | x| | | */ /* UZV = UUZZ-VVV | | | | x| x | */ /* Z = VVV*Z1Z2 | | x| | | x | */ /* VYZ = VVV*Y1Z2 | | | | | x x| */ /* R2 = 2*R | x | | | | | */ /* A = UZV-2R | | | | x x| | */ /* X = V*A x| | | | x | */ /* RA = R-A | x| | | x | */ /* URA = U*RA | x | x| | */ /* Y = URA-VYZ | | | x x */ uint32_t temp[32]; #define X1 val_1 + X #define Y1 val_1 + Y #define Z1 val_1 + Z #define X2 x_2 #define Y2 y_2 #define Z2 z_2 #define V result + X #define X1Z2 result + Y #define R result + Y #define RA result + Y #define Z1Z2 result + Z #define X2Z1 temp + X #define VV temp + X #define Y2Z1 temp + X #define U temp + X #define URA temp + X #define UU temp + Y #define UUZZ temp + Y #define UZV temp + Y #define VVV temp + Z #define R2 temp + Z #define A temp + Z #define Y1Z2 temp + 24 #define VYZ temp + 24 uint8_t is_input1_zero = ecc_isX(val_1 + X, 0) & ecc_isX(val_1 + Y, 0) & ecc_isX(val_1 + Z, 1); uint8_t is_input2_zero = ecc_isX(x_2, 0) & ecc_isX(y_2, 0) & ecc_isX(z_2, 1); ecc_copy(temp + X, x_2); ecc_copy(temp + Y, y_2); ecc_copy(temp + Z, z_2); ecc_replace(is_input1_zero, result, temp); ecc_copy(temp + X, val_1 + X); ecc_copy(temp + Y, val_1 + Y); ecc_copy(temp + Z, val_1 + Z); ecc_replace(is_input2_zero, result, temp); /* invalidate the result pointer */ result = (uint32_t *)((uintptr_t)result ^ (-(is_input2_zero | is_input1_zero) & ((uintptr_t)result ^ (uintptr_t)temp))); ecc_field_Mult(Y1Z2, Y1, Z2); ecc_field_Mult(X2Z1, X2, Z1); ecc_field_Mult(X1Z2, X1, Z2); ecc_field_Sub(V, X2Z1, X1Z2); ecc_field_Mult(VV, V, V); ecc_field_Mult(R, VV, X1Z2); ecc_field_Mult(VVV, V, VV); ecc_field_Mult(Y2Z1, Y2, Z1); ecc_field_Sub(U, Y2Z1, Y1Z2); ecc_field_Mult(UU, U, U); ecc_field_Mult(Z1Z2, Z1, Z2); ecc_field_Mult(UUZZ, UU, Z1Z2); ecc_field_Sub(UZV, UUZZ, VVV); ecc_field_Mult(result + Z, VVV, Z1Z2); ecc_field_Mult(VYZ, VVV, Y1Z2); ecc_field_Add(R2, R, R); ecc_field_Sub(A, UZV, R2); ecc_field_Mult(result + X, V, A); ecc_field_Sub(RA, R, A); ecc_field_Mult(URA, U, RA); ecc_field_Sub(result + Y, URA, VYZ); } /*---------------------------------------------------------------------------*/ #if SELF_TEST static void assertTrue(uint32_t value, const char *msg) { if(!value) { printf("%s\n", msg); } } static void assertFalse(uint32_t value, const char *msg) { if(value) { printf("%s\n", msg); } } static void assertSame(uint32_t *val_1, uint32_t *val_2, const char *msg) { if(ecc_compare(val_1, val_2)) { printf("%s\n", msg); } } static void selfTest() { uint32_t num_000[8] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; uint32_t num_001[8] = { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; uint32_t num_002[8] = { 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; uint32_t num_004[8] = { 0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; uint32_t num_max[8] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; uint32_t primeMinusOne[8] = { 0xfffffffe, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xffffffff }; uint32_t result[8]; /* ecc_compare */ assertFalse(ecc_compare(num_001, num_001), "ecc_compare 1: Wrong result!"); assertTrue(ecc_compare(num_000, num_001) == -1, "ecc_compare 2: Wrong result!"); assertTrue(ecc_compare(num_001, num_000) == 1, "ecc_compare 3: Wrong result!"); /* ecc_isX */ assertTrue(ecc_isX(num_000, 0), "ecc_isX 1: Wrong result!"); assertTrue(ecc_isX(num_001, 1), "ecc_isX 2: Wrong result!"); assertTrue(ecc_isX(num_002, 2), "ecc_isX 3: Wrong result!"); assertTrue(ecc_isX(num_004, 4), "ecc_isX 4: Wrong result!"); assertFalse(ecc_isX(num_000, 1), "ecc_isX 5: Wrong result!"); assertFalse(ecc_isX(num_000, 2), "ecc_isX 6: Wrong result!"); assertFalse(ecc_isX(num_000, 4), "ecc_isX 7: Wrong result!"); assertFalse(ecc_isX(num_001, 0), "ecc_isX 8: Wrong result!"); assertFalse(ecc_isX(num_001, 2), "ecc_isX 9: Wrong result!"); assertFalse(ecc_isX(num_001, 4), "ecc_isX 10: Wrong result!"); assertFalse(ecc_isX(num_002, 0), "ecc_isX 11: Wrong result!"); assertFalse(ecc_isX(num_002, 1), "ecc_isX 12: Wrong result!"); assertFalse(ecc_isX(num_002, 4), "ecc_isX 13: Wrong result!"); assertFalse(ecc_isX(num_004, 0), "ecc_isX 14: Wrong result!"); assertFalse(ecc_isX(num_004, 1), "ecc_isX 15: Wrong result!"); assertFalse(ecc_isX(num_004, 2), "ecc_isX 16: Wrong result!"); /* ecc_add */ assertFalse(ecc_add(result, num_001, num_002), "ecc_add 1: Unexpected carrybit!"); assertFalse(ecc_add(result, result, num_001), "ecc_add 2: Unexpected carrybit!"); assertSame(result, num_004, "ecc_add 3: Wrong result!"); assertTrue(ecc_add(result, num_max, num_002), "ecc_add 4: Carrybit missing!"); assertSame(result, num_001, "ecc_add 5: Wrong result!"); /* ecc_sub */ assertFalse(ecc_sub(result, num_004, num_002), "ecc_sub 1: Unexpected carrybit!"); assertFalse(ecc_sub(result, result, num_001), "ecc_sub 2: Unexpected carrybit!"); assertFalse(ecc_sub(result, result, num_001), "ecc_sub 3: Unexpected carrybit!"); assertSame(result, num_000, "ecc_sub 4: Wrong result!"); assertTrue(ecc_sub(result, num_000, num_001), "ecc_sub 5: Carrybit missing!"); assertSame(result, num_max, "ecc_sub 6: Wrong result!"); /* ecc_field_Sub */ ecc_field_Sub(result, num_001, num_000); assertSame(num_001, result, "ecc_field_Sub 1: Wrong result!"); ecc_field_Sub(result, num_001, num_001); assertSame(num_000, result, "ecc_field_Sub 2: Wrong result!"); ecc_field_Sub(result, num_000, num_001); assertSame(primeMinusOne, result, "ecc_field_Sub 3: Wrong result!"); printf("Tests completed!\n"); } #endif