Benutzer:Rdiez/Adler-32 checksum in AVR Assembler

Aus /dev/tal
< Benutzer:Rdiez
Version vom 10. Mai 2014, 23:38 Uhr von Rdiez (Diskussion | Beiträge) (Die Seite wurde neu angelegt: „{{BenutzerSeitenNichtVeraendernWarnung|rdiez}} = Adler-32 checksum in AVR Assembler = Your AVR bootloader should check the application's integrity before att…“)

(Unterschied) ← Nächstältere Version | Aktuelle Version (Unterschied) | Nächstjüngere Version → (Unterschied)
Wechseln zu: Navigation, Suche
Warning sign
Dies sind die persönlichen Benutzerseiten von rdiez, bitte nicht verändern! Ausnahmen sind nur einfache Sprachkorrekturen wie Tippfehler, falsche Präpositionen oder Ähnliches. Alles andere bitte nur dem Benutzer melden!


Adler-32 checksum in AVR Assembler

Your AVR bootloader should check the application's integrity before attempting to start it. The Adler-32 checksum offers a good trade-off among performance, size and robustness.

This is a straightforward implementation in AVR Assembler 2. It is pretty fast as it checks against overflow on every step so as to avoid calculating any division remainders (modulo), which the AVR does not implement in hardware. Note however that zlib's implementation is still faster, as it performs many steps in a row before reducing (calculating the module of) the accumulator.

#define R22_LOW__MOD_ADLER32 r22
#define R23_HIGH_MOD_ADLER32 r23

#define R18_LOW__ADLER_A r18
#define R19_HIGH_ADLER_A r19
#define R20_LOW__ADLER_B r20
#define R21_HIGH_ADLER_B r21

; ----------------------------
; r24 = new data byte to add to the Adler-32 checksum.
; r23:r22 = must be set to MOD_ADLER upon entry, they are not changed.
; r21:r20:r19:r18 = the previous Adler-32 checksum,
; these registers receive the updated value.
; r1 must be zero upon entry.
;
; Registers r0 and r22 are overwritten.

.equ MOD_ADLER32 = 65521

accumulate_adler32:

   ; This routine processes one byte at a time. If you already
   ; have a bigger data chunk available, there are faster ways,
   ; see the zlib implementation for details, file adler32.c,
   ; function adler32().

   ; Add the new Byte to the A component.
   clr r0
   add R18_LOW__ADLER_A, r24
   adc R19_HIGH_ADLER_A, R1_ZERO
   adc r0, R1_ZERO

   ; Have we reached the MOD_ADLER32 value?
   cp  R18_LOW__ADLER_A, R22_LOW__MOD_ADLER32
   cpc R19_HIGH_ADLER_A, R23_HIGH_MOD_ADLER32
   cpc r0, R1_ZERO

   brlo accumulate_adler32_no_overflow_a

   ; If so, substract MOD_ADLER32.
   sub R18_LOW__ADLER_A, R22_LOW__MOD_ADLER32
   sbc R19_HIGH_ADLER_A, R23_HIGH_MOD_ADLER32
   ; We do not need the following: we know the end
   ; r0 value will always be zero.
   ; sbc r0, R1_ZERO

accumulate_adler32_no_overflow_a:

   ; Update the B component.
   clr r0
   add R20_LOW__ADLER_B, R18_LOW__ADLER_A
   adc R21_HIGH_ADLER_B, R19_HIGH_ADLER_A
   adc r0, R1_ZERO

   ; Have we reached the MOD_ADLER32 value?
   cp  R20_LOW__ADLER_B, R22_LOW__MOD_ADLER32
   cpc R21_HIGH_ADLER_B, R23_HIGH_MOD_ADLER32
   cpc r0, R1_ZERO

   brlo accumulate_adler32_no_overflow_b

   ; If so, substract MOD_ADLER32.
   sub R20_LOW__ADLER_B, R22_LOW__MOD_ADLER32
   sbc R21_HIGH_ADLER_B, R23_HIGH_MOD_ADLER32
   ; We do not need the following: we know the end
   ; r0 value will always be zero.
   ; sbc r0, R1_ZERO

accumulate_adler32_no_overflow_b:
   ret

Caller example:

   ; The Adler-32 initial value is 1 (0x00000001).
   clr r21
   clr r20
   clr r19
   ldi r18, 0x01

   ldi R22_LOW__MOD_ADLER32, low ( MOD_ADLER32 )
   ldi R23_HIGH_MOD_ADLER32, high( MOD_ADLER32 )
   clr r1

   ldi r24, 10  ; First byte
   rcall accumulate_adler32

   ldi r24, 20  ; Second byte
   rcall accumulate_adler32

   ... etc ...

Equivalent in C. Note that this routine does calculate division reminders (modulo), which is very slow on AVR processors:

void adler32_update_c ( uint16_t * __restrict__ a,
                        uint16_t * __restrict__ b,
                        uint8_t data_byte )
{
   const uint16_t MOD_ADLER = 65521;
	
   *a = (((uint32_t)*a) + data_byte) % MOD_ADLER;
   *b = (((uint32_t)*b) + *a) % MOD_ADLER;
}

This is the same assembly implementation at the top ported to the GCC inline assembler, so that the compiler can optimise the CPU register usage based on the code around it:

void adler32_update ( uint16_t * __restrict__ a,
                      uint16_t * __restrict__ b,
                      uint8_t data_byte )
{
   // This routine processes one byte at a time. If you already
   // have a bigger data chunk available, there are faster ways,
   // see the zlib implementation for details, file adler32.c,
   // function adler32().  
   
   const uint16_t MOD_ADLER = 65521;
   
   uint16_t adler_a = *a;
   uint16_t adler_b = *b;

   asm  // No "asm volatile", so that the compiler can optimize
        //  the whole lot away if possible.
   (
       "; Add the new Byte to the A component            \n\t" \
       "clr r0                                           \n\t" \
       "add %A[adler_a], %[data_byte]                    \n\t" \
       "adc %B[adler_a], __zero_reg__                    \n\t" \
       "adc r0, __zero_reg__                             \n\t" \
       "                                                 \n\t" \
       "; Have we reached the MOD_ADLER32 value?         \n\t" \
       "cp  %A[adler_a], %A[mod_adler]                   \n\t" \
       "cpc %B[adler_a], %B[mod_adler]                   \n\t" \
       "cpc r0, __zero_reg__                             \n\t" \
       "brlo accumulate_adler32_no_overflow_a            \n\t" \
       "                                                 \n\t" \
       "; If so, substract MOD_ADLER32.                  \n\t" \
       "sub %A[adler_a], %A[mod_adler]                   \n\t" \
       "sbc %B[adler_a], %B[mod_adler]                   \n\t" \
       "; We do not need the following: we know the end  \n\t" \
       "; r0 value will always be zero.                  \n\t" \
       "; sbc r0, __zero_reg__                           \n\t" \
       "                                                 \n\t" \
       "accumulate_adler32_no_overflow_a:                \n\t" \
       "                                                 \n\t" \
       "; Update the B component.                        \n\t" \
       "clr r0                                           \n\t" \
       "add %A[adler_b], %A[adler_a]                     \n\t" \
       "adc %B[adler_b], %B[adler_a]                     \n\t" \
       "adc r0, __zero_reg__                             \n\t" \
       "                                                 \n\t" \
       "; Have we reached the MOD_ADLER32 value?         \n\t" \
       "cp  %A[adler_b], %A[mod_adler]                   \n\t" \
       "cpc %B[adler_b], %B[mod_adler]                   \n\t" \
       "cpc r0, __zero_reg__                             \n\t" \
       "brlo accumulate_adler32_no_overflow_b            \n\t" \
       "                                                 \n\t" \
       "; If so, substract MOD_ADLER32.                  \n\t" \
       "sub %A[adler_b], %A[mod_adler]                   \n\t" \
       "sbc %B[adler_b], %B[mod_adler]                   \n\t" \
       "; We do not need the following: we know the end  \n\t" \
       "; r0 value will always be zero.                  \n\t" \
       "; sbc r0, __zero_reg__                           \n\t" \
       "                                                 \n\t" \
       "accumulate_adler32_no_overflow_b:                \n\t"
        
       : // output operands
         [adler_a] "+r" (adler_a),
         [adler_b] "+r" (adler_b)
       
       : // input  operands
         [data_byte] "r" (data_byte),
         [mod_adler] "r" (MOD_ADLER)
       
       :  "r0" // clobbered registers
   );

   *a = adler_a;
   *b = adler_b;
}

This article originally appeared in EmbDev.net in September 2010‎.