.text .globl dotproduct .type dotproduct,@function dotproduct: # rowloop mov $8, %cl 1: rol $8, %rdi # get next row of A into low 8 bits # columnloop mov $8, %ch 2: # extract first column movq %rsi, %mm0 pmovmskb %mm0, %edx # dot with current row, clears CF, product in PF and %dil, %dl jp 3f # set CF to inverse of PF stc 3: adc %rax, %rax # shift CF into LSB of %rax # shift over columns of B rol %rsi dec %ch jnz 2b # restore B ror $8, %rsi loop 1b # %ch will be zero here ret .size dotproduct,.-dotproduct