|
|
- // +build amd64,!appengine,!go1.9
-
- TEXT ·hasAsm(SB),4,$0-1
- MOVQ $1, AX
- CPUID
- SHRQ $23, CX
- ANDQ $1, CX
- MOVB CX, ret+0(FP)
- RET
-
- #define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
-
- TEXT ·popcntSliceAsm(SB),4,$0-32
- XORQ AX, AX
- MOVQ s+0(FP), SI
- MOVQ s_len+8(FP), CX
- TESTQ CX, CX
- JZ popcntSliceEnd
- popcntSliceLoop:
- BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
- ADDQ DX, AX
- ADDQ $8, SI
- LOOP popcntSliceLoop
- popcntSliceEnd:
- MOVQ AX, ret+24(FP)
- RET
-
- TEXT ·popcntMaskSliceAsm(SB),4,$0-56
- XORQ AX, AX
- MOVQ s+0(FP), SI
- MOVQ s_len+8(FP), CX
- TESTQ CX, CX
- JZ popcntMaskSliceEnd
- MOVQ m+24(FP), DI
- popcntMaskSliceLoop:
- MOVQ (DI), DX
- NOTQ DX
- ANDQ (SI), DX
- POPCNTQ_DX_DX
- ADDQ DX, AX
- ADDQ $8, SI
- ADDQ $8, DI
- LOOP popcntMaskSliceLoop
- popcntMaskSliceEnd:
- MOVQ AX, ret+48(FP)
- RET
-
- TEXT ·popcntAndSliceAsm(SB),4,$0-56
- XORQ AX, AX
- MOVQ s+0(FP), SI
- MOVQ s_len+8(FP), CX
- TESTQ CX, CX
- JZ popcntAndSliceEnd
- MOVQ m+24(FP), DI
- popcntAndSliceLoop:
- MOVQ (DI), DX
- ANDQ (SI), DX
- POPCNTQ_DX_DX
- ADDQ DX, AX
- ADDQ $8, SI
- ADDQ $8, DI
- LOOP popcntAndSliceLoop
- popcntAndSliceEnd:
- MOVQ AX, ret+48(FP)
- RET
-
- TEXT ·popcntOrSliceAsm(SB),4,$0-56
- XORQ AX, AX
- MOVQ s+0(FP), SI
- MOVQ s_len+8(FP), CX
- TESTQ CX, CX
- JZ popcntOrSliceEnd
- MOVQ m+24(FP), DI
- popcntOrSliceLoop:
- MOVQ (DI), DX
- ORQ (SI), DX
- POPCNTQ_DX_DX
- ADDQ DX, AX
- ADDQ $8, SI
- ADDQ $8, DI
- LOOP popcntOrSliceLoop
- popcntOrSliceEnd:
- MOVQ AX, ret+48(FP)
- RET
-
- TEXT ·popcntXorSliceAsm(SB),4,$0-56
- XORQ AX, AX
- MOVQ s+0(FP), SI
- MOVQ s_len+8(FP), CX
- TESTQ CX, CX
- JZ popcntXorSliceEnd
- MOVQ m+24(FP), DI
- popcntXorSliceLoop:
- MOVQ (DI), DX
- XORQ (SI), DX
- POPCNTQ_DX_DX
- ADDQ DX, AX
- ADDQ $8, SI
- ADDQ $8, DI
- LOOP popcntXorSliceLoop
- popcntXorSliceEnd:
- MOVQ AX, ret+48(FP)
- RET
|