diff --git a/asm_avr.inc b/asm_avr.inc index c14bf55..3abac41 100644 --- a/asm_avr.inc +++ b/asm_avr.inc @@ -216,48 +216,40 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, wordcount_t num_words) { /* num_words should already be in r18. */ register wordcount_t r18 __asm__("r18") = num_words; - - __asm__ volatile ( - "push r18 \n\t" + //L1uTongwei: To fix issue #154, we need to divide the asm code, because the code is too long. + FAST_MULT_ENVIRONMENT("push r18 \n\t"); #if (uECC_MIN_WORDS == 20) - FAST_MULT_ASM_20 - "pop r18 \n\t" - #if (uECC_MAX_WORDS > 20) - FAST_MULT_ASM_20_TO_24 - #endif - #if (uECC_MAX_WORDS > 24) - FAST_MULT_ASM_24_TO_28 - #endif - #if (uECC_MAX_WORDS > 28) - FAST_MULT_ASM_28_TO_32 - #endif + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_20); + FAST_MULT_ENVIRONMENT("pop r18 \n\t"); +#if (uECC_MAX_WORDS > 20) + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_20_TO_24); +#endif +#if (uECC_MAX_WORDS > 24) + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_24_TO_28); +#endif +#if (uECC_MAX_WORDS > 28) + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_28_TO_32); +#endif #elif (uECC_MIN_WORDS == 24) - FAST_MULT_ASM_24 - "pop r18 \n\t" - #if (uECC_MAX_WORDS > 24) - FAST_MULT_ASM_24_TO_28 - #endif - #if (uECC_MAX_WORDS > 28) - FAST_MULT_ASM_28_TO_32 - #endif + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_24); + FAST_MULT_ENVIRONMENT("pop r18 \n\t"); +#if (uECC_MAX_WORDS > 24) + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_24_TO_28); +#endif +#if (uECC_MAX_WORDS > 28) + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_28_TO_32); +#endif #elif (uECC_MIN_WORDS == 28) - FAST_MULT_ASM_28 - "pop r18 \n\t" - #if (uECC_MAX_WORDS > 28) - FAST_MULT_ASM_28_TO_32 - #endif + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_28) + FAST_MULT_ENVIRONMENT("pop r18 \n\t"); +#if (uECC_MAX_WORDS > 28) + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_28_TO_32); +#endif #elif (uECC_MIN_WORDS == 32) - FAST_MULT_ASM_32 - "pop r18 \n\t" + FAST_MULT_ENVIRONMENT(FAST_MULT_ASM_32); + FAST_MULT_ENVIRONMENT("pop r18 \n\t"); #endif - "2: \n\t" - "eor r1, r1 \n\t" - : "+x" (left), "+y" (right), "+z" (result) - : "r" (r18) - : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20", - "r21", "r22", "r23", "r24", "r25", "cc" - ); + FAST_MULT_ENVIRONMENT("2: \n\t eor r1, r1 \n\t"); } #define asm_mult 1 @@ -268,48 +260,40 @@ uECC_VLI_API void uECC_vli_square(uECC_word_t *result, wordcount_t num_words) { /* num_words should already be in r20. */ register wordcount_t r20 __asm__("r20") = num_words; - - __asm__ volatile ( - "push r20 \n\t" + //L1uTongwei: these code are the same as mult. + FAST_SQUARE_ENVIRONMENT("push r20 \n\t"); #if (uECC_MIN_WORDS == 20) - FAST_SQUARE_ASM_20 - "pop r20 \n\t" - #if (uECC_MAX_WORDS > 20) - FAST_SQUARE_ASM_20_TO_24 - #endif - #if (uECC_MAX_WORDS > 24) - FAST_SQUARE_ASM_24_TO_28 - #endif - #if (uECC_MAX_WORDS > 28) - FAST_SQUARE_ASM_28_TO_32 - #endif + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_20); + FAST_SQUARE_ENVIRONMENT("pop r20 \n\t"); +#if (uECC_MAX_WORDS > 20) + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_20_TO_24); +#endif +#if (uECC_MAX_WORDS > 24) + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_24_TO_28); +#endif +#if (uECC_MAX_WORDS > 28) + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_28_TO_32); +#endif #elif (uECC_MIN_WORDS == 24) - FAST_SQUARE_ASM_24 - "pop r20 \n\t" - #if (uECC_MAX_WORDS > 24) - FAST_SQUARE_ASM_24_TO_28 - #endif - #if (uECC_MAX_WORDS > 28) - FAST_SQUARE_ASM_28_TO_32 - #endif + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_24); + FAST_SQUARE_ENVIRONMENT("pop r20 \n\t"); +#if (uECC_MAX_WORDS > 24) + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_24_TO_28); +#endif +#if (uECC_MAX_WORDS > 28) + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_28_TO_32); +#endif #elif (uECC_MIN_WORDS == 28) - FAST_SQUARE_ASM_28 - "pop r20 \n\t" - #if (uECC_MAX_WORDS > 28) - FAST_SQUARE_ASM_28_TO_32 - #endif + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_28); + FAST_SQUARE_ENVIRONMENT("pop r20 \n\t"); +#if (uECC_MAX_WORDS > 28) + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_28_TO_32); +#endif #elif (uECC_MIN_WORDS == 32) - FAST_SQUARE_ASM_32 - "pop r20 \n\t" + FAST_SQUARE_ENVIRONMENT(FAST_SQUARE_ASM_32); + FAST_SQUARE_ENVIRONMENT("pop r20 \n\t"); #endif - "2: \n\t" - "eor r1, r1 \n\t" - : "+x" (left), "+z" (result) - : "r" (r20) - : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", - "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc" - ); + FAST_SQUARE_ENVIRONMENT("2: \n\t eor r1, r1 \n\t"); } #define asm_square 1 #endif /* uECC_SQUARE_FUNC */ diff --git a/asm_avr_mult_square.inc b/asm_avr_mult_square.inc index 7ae08bc..4f1a554 100644 --- a/asm_avr_mult_square.inc +++ b/asm_avr_mult_square.inc @@ -3,6 +3,13 @@ #ifndef _UECC_ASM_AVR_MULT_SQUARE_H_ #define _UECC_ASM_AVR_MULT_SQUARE_H_ +//L1uTongwei: These environments are comfort for this avr optimize. +//This be adopted by asm_avr.inc to fix the issue #154 +#define FAST_MULT_ENVIRONMENT(X) \ +__asm__ volatile ( X : "+x" (left), "+y" (right), "+z" (result) : "r" (r18) : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc"); +#define FAST_SQUARE_ENVIRONMENT(X) \ +__asm__ volatile ( X : "+x" (left), "+z" (result) : "r" (r20) : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc"); + #define FAST_MULT_ASM_20 \ "adiw r30, 10 \n\t" \ "adiw r28, 10 \n\t" \ diff --git a/examples/ecc_test/ecc_test.ino b/examples/ecc_test/ecc_test.ino index 64a0be5..020da12 100644 --- a/examples/ecc_test/ecc_test.ino +++ b/examples/ecc_test/ecc_test.ino @@ -1,5 +1,8 @@ #include +//L1uTongwei: uncomment this to test optimize. +#define uECC_OPTIMIZATION_LEVEL 4 + static int RNG(uint8_t *dest, unsigned size) { // Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of // random noise). This can take a long time to generate random data if the result of analogRead(0)