Untitled
/* C = C + AI * B */ .macro mul_8x1, AI, C0, C1, C2, C3, C4, C5, C6, C7, C8, B0, B1, B2, B3, B4, B5, B6, B7, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, CARRY_REG mul \T0, \AI, \B0 umulh \T1, \AI, \B0 mul \T2, \AI, \B2 umulh \T3, \AI, \B2 mul \T4, \AI, \B4 umulh \T5, \AI, \B4 mul \T6, \AI, \B6 umulh \T7, \AI, \B6 adds \C0, \C0, \T0 adcs \C1, \C1, \T1 adcs \C2, \C2, \T2 adcs \C3, \C3, \T3 adcs \C4, \C4, \T4 adcs \C5, \C5, \T5 adcs \C6, \C6, \T6 adcs \C7, \C7, \T7 adc \C8, \C8, xzr mul \T0, \AI, \B1 umulh \T1, \AI, \B1 mul \T2, \AI, \B3 umulh \T3, \AI, \B3 mul \T4, \AI, \B5 umulh \T5, \AI, \B5 mul \T6, \AI, \B7 umulh \T7, \AI, \B7 adds \C1, \C1, \T0 adcs \C2, \C2, \T1 adcs \C3, \C3, \T2 adcs \C4, \C4, \T3 adcs \C5, \C5, \T4 adcs \C6, \C6, \T5 adcs \C7, \C7, \T6 adc \C8, \C8, \T7 ; /* ai * B first 5 limbs */ ; mul \T0, \AI, \B0 ; umulh \T1, \AI, \B0 ; mul \T2, \AI, \B1 ; umulh \T3, \AI, \B1 ; mul \T4, \AI, \B2 ; umulh \T5, \AI, \B2 ; mul \T6, \AI, \B3 ; umulh \T7, \AI, \B3 ; mul \T8, \AI, \B4 ; umulh \T9, \AI, \B4 ; adds \C0, \C0, \T0 // mul C0 ; adcs \C1, \C1, \T1 // umul C1 ; adcs \CARRY_REG, xzr, xzr ; adcs \C1, \C1, \T2 // mul C1 ; adcs \C2, \C2, \CARRY_REG // add carry to next limb C2 ; adcs \CARRY_REG, xzr, xzr // init new carry leg for C3 ; adcs \C2, \C2, \T3 // umul C2 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C2, \C2, \T4 // mul C2 ; adcs \C3, \C3, \CARRY_REG // add carry to next limb C3 ; adcs \CARRY_REG, xzr, xzr // init new carry leg for C4 ; adcs \C3, \C3, \T5 // umul C3 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C3, \C3, \T6 // mul C3 ; adcs \C4, \C4, \CARRY_REG // add carry to next limb C4 ; adcs \CARRY_REG, xzr, xzr // init new carry leg for C5 ; adcs \C4, \C4, \T7 // umul C4 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C4, \C4, \T8 // mul C4 ; adcs \C5, \C5, \CARRY_REG // add carry to next limb C5 ; adcs \CARRY_REG, xzr, xzr // init carry reg for C6 ; adcs \C5, \C5, \T9 // umul C5 ; adcs \CARRY_REG, \CARRY_REG, xzr ; /* Mul seconds half of B with ai add to c ['\\B5', '\\B6', '\\B7'] */ ; mul \T0, \AI, \B5 ; umulh \T1, \AI, \B5 ; mul \T2, \AI, \B6 ; umulh \T3, \AI, \B6 ; mul \T4, \AI, \B7 ; umulh \T5, \AI, \B7 ; /* Again add to C ['\\C5', '\\C6', '\\C7', '\\C8'] with carry reg \CARRY_REG */ ; adcs \C5, \T0, \C5 // mul C5 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C6, \C6, \CARRY_REG // add carry to next limb C6 ; adcs \CARRY_REG, xzr, xzr // init carry reg for C7 ; adcs \C6, \C6, \T1 // umul C6 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C6, \T2, \C6 // mul C6 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C7, \C7, \CARRY_REG // add carry to next limb C7 ; adcs \CARRY_REG, xzr, xzr // init carry reg for C8 ; adcs \C7, \C7, \T3 // umul C7 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C7, \T4, \C7 // mul C7 ; adcs \CARRY_REG, \CARRY_REG, xzr ; adcs \C8, \C8, \CARRY_REG // add carry to next limb C8 ; adcs \CARRY_REG, xzr, xzr // init carry reg for C9 ; adcs \C8, \C8, \T5 // umul C8 ; adcs \CARRY_REG, \CARRY_REG, xzr .endm
Leave a Comment