go-json/internal/encoder/string_avx.s

164 lines
7.0 KiB
ArmAsm
Raw Normal View History

2021-12-27 19:13:18 +03:00
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
DATA LCDATA1<>+0x000(SB)/8, $0xdfdfdfdfdfdfdfe0
DATA LCDATA1<>+0x008(SB)/8, $0xdfdfdfdfdfdfdfe0
DATA LCDATA1<>+0x010(SB)/8, $0x2222222222222222
DATA LCDATA1<>+0x018(SB)/8, $0x2222222222222222
DATA LCDATA1<>+0x020(SB)/8, $0xfefefefefefefeff
DATA LCDATA1<>+0x028(SB)/8, $0xfefefefefefefeff
DATA LCDATA1<>+0x030(SB)/8, $0x5c5c5c5c5c5c5c5c
DATA LCDATA1<>+0x038(SB)/8, $0x5c5c5c5c5c5c5c5c
DATA LCDATA1<>+0x040(SB)/8, $0x3c3c3c3c3c3c3c3c
DATA LCDATA1<>+0x048(SB)/8, $0x3c3c3c3c3c3c3c3c
DATA LCDATA1<>+0x050(SB)/8, $0x3e3e3e3e3e3e3e3e
DATA LCDATA1<>+0x058(SB)/8, $0x3e3e3e3e3e3e3e3e
DATA LCDATA1<>+0x060(SB)/8, $0x2626262626262626
DATA LCDATA1<>+0x068(SB)/8, $0x2626262626262626
DATA LCDATA1<>+0x070(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x078(SB)/8, $0x8080808080808080
GLOBL LCDATA1<>(SB), 8, $128
TEXT ·_findEscapeIndex128(SB), $0-24
MOVQ buf+0(FP), DI
MOVQ len+8(FP), SI
LEAQ LCDATA1<>(SB), BP
WORD $0xf089 // mov eax, esi
WORD $0xf8c1; BYTE $0x1f // sar eax, 31
WORD $0xe8c1; BYTE $0x1c // shr eax, 28
WORD $0xf001 // add eax, esi
WORD $0xf8c1; BYTE $0x04 // sar eax, 4
WORD $0x9848 // cdqe
WORD $0xc683; BYTE $0x0f // add esi, 15
WORD $0xfe83; BYTE $0x1f // cmp esi, 31
JB LBB0_5
WORD $0xc931 // xor ecx, ecx
LONG $0x456f79c5; BYTE $0x00 // vmovdqa xmm8, oword 0[rbp] /* [rip + .LCPI0_0] */
LONG $0x4d6f79c5; BYTE $0x10 // vmovdqa xmm9, oword 16[rbp] /* [rip + .LCPI0_1] */
LONG $0x556ff9c5; BYTE $0x20 // vmovdqa xmm2, oword 32[rbp] /* [rip + .LCPI0_2] */
LONG $0x556f79c5; BYTE $0x30 // vmovdqa xmm10, oword 48[rbp] /* [rip + .LCPI0_3] */
LONG $0x5d6f79c5; BYTE $0x40 // vmovdqa xmm11, oword 64[rbp] /* [rip + .LCPI0_4] */
LONG $0x656f79c5; BYTE $0x50 // vmovdqa xmm12, oword 80[rbp] /* [rip + .LCPI0_5] */
LONG $0x6d6f79c5; BYTE $0x60 // vmovdqa xmm13, oword 96[rbp] /* [rip + .LCPI0_6] */
LONG $0x7d6ff9c5; BYTE $0x70 // vmovdqa xmm7, oword 112[rbp] /* [rip + .LCPI0_7] */
LBB0_2:
LONG $0x076ffac5 // vmovdqu xmm0, oword [rdi]
LONG $0xd479c1c4; BYTE $0xc8 // vpaddq xmm1, xmm0, xmm8
LONG $0xef79c1c4; BYTE $0xd9 // vpxor xmm3, xmm0, xmm9
LONG $0xdad4e1c5 // vpaddq xmm3, xmm3, xmm2
LONG $0xef79c1c4; BYTE $0xe2 // vpxor xmm4, xmm0, xmm10
LONG $0xe2d4d9c5 // vpaddq xmm4, xmm4, xmm2
LONG $0xef79c1c4; BYTE $0xeb // vpxor xmm5, xmm0, xmm11
LONG $0xead4d1c5 // vpaddq xmm5, xmm5, xmm2
LONG $0xe5ebd9c5 // vpor xmm4, xmm4, xmm5
LONG $0xef79c1c4; BYTE $0xec // vpxor xmm5, xmm0, xmm12
LONG $0xead4d1c5 // vpaddq xmm5, xmm5, xmm2
LONG $0xef79c1c4; BYTE $0xf5 // vpxor xmm6, xmm0, xmm13
LONG $0xf2d4c9c5 // vpaddq xmm6, xmm6, xmm2
LONG $0xeeebd1c5 // vpor xmm5, xmm5, xmm6
LONG $0xc0ebf1c5 // vpor xmm0, xmm1, xmm0
LONG $0xc3ebf9c5 // vpor xmm0, xmm0, xmm3
LONG $0xc4ebf9c5 // vpor xmm0, xmm0, xmm4
LONG $0xc5ebf9c5 // vpor xmm0, xmm0, xmm5
LONG $0xc7dbf9c5 // vpand xmm0, xmm0, xmm7
LONG $0xd0d7f9c5 // vpmovmskb edx, xmm0
WORD $0xd285 // test edx, edx
JNE LBB0_3
LONG $0x10c78348 // add rdi, 16
LONG $0x01c18348 // add rcx, 1
WORD $0x3948; BYTE $0xc1 // cmp rcx, rax
JB LBB0_2
LBB0_5:
LONG $0x04e0c148 // shl rax, 4
JMP LBB0_6
LBB0_3:
WORD $0xbc0f; BYTE $0xc2 // bsf eax, edx
LBB0_6:
MOVQ AX, ret+16(FP)
RET
DATA LCDATA2<>+0x000(SB)/8, $0xdfdfdfdfdfdfdfe0
DATA LCDATA2<>+0x008(SB)/8, $0x2222222222222222
DATA LCDATA2<>+0x010(SB)/8, $0xfefefefefefefeff
DATA LCDATA2<>+0x018(SB)/8, $0x5c5c5c5c5c5c5c5c
DATA LCDATA2<>+0x020(SB)/8, $0x3c3c3c3c3c3c3c3c
DATA LCDATA2<>+0x028(SB)/8, $0x3e3e3e3e3e3e3e3e
DATA LCDATA2<>+0x030(SB)/8, $0x2626262626262626
DATA LCDATA2<>+0x038(SB)/8, $0x0000000000000000
DATA LCDATA2<>+0x040(SB)/8, $0x8080808080808080
DATA LCDATA2<>+0x048(SB)/8, $0x8080808080808080
DATA LCDATA2<>+0x050(SB)/8, $0x8080808080808080
DATA LCDATA2<>+0x058(SB)/8, $0x8080808080808080
GLOBL LCDATA2<>(SB), 8, $96
TEXT ·_findEscapeIndex256(SB), $0-24
MOVQ buf+0(FP), DI
MOVQ len+8(FP), SI
LEAQ LCDATA2<>(SB), BP
WORD $0xf089 // mov eax, esi
WORD $0xf8c1; BYTE $0x1f // sar eax, 31
WORD $0xe8c1; BYTE $0x1b // shr eax, 27
WORD $0xf001 // add eax, esi
WORD $0xf8c1; BYTE $0x05 // sar eax, 5
WORD $0x9848 // cdqe
WORD $0xc683; BYTE $0x1f // add esi, 31
WORD $0xfe83; BYTE $0x3f // cmp esi, 63
JB LBB1_5
LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI1_0] */
LONG $0x597de2c4; WORD $0x084d // vpbroadcastq ymm1, qword 8[rbp] /* [rip + .LCPI1_1] */
LONG $0x597de2c4; WORD $0x1055 // vpbroadcastq ymm2, qword 16[rbp] /* [rip + .LCPI1_2] */
LONG $0x597de2c4; WORD $0x185d // vpbroadcastq ymm3, qword 24[rbp] /* [rip + .LCPI1_3] */
LONG $0x597de2c4; WORD $0x2065 // vpbroadcastq ymm4, qword 32[rbp] /* [rip + .LCPI1_4] */
LONG $0x597de2c4; WORD $0x286d // vpbroadcastq ymm5, qword 40[rbp] /* [rip + .LCPI1_5] */
WORD $0xc931 // xor ecx, ecx
LONG $0x597de2c4; WORD $0x3075 // vpbroadcastq ymm6, qword 48[rbp] /* [rip + .LCPI1_6] */
LONG $0x7d6ffdc5; BYTE $0x40 // vmovdqa ymm7, yword 64[rbp] /* [rip + .LCPI1_7] */
LBB1_2:
LONG $0x076f7ec5 // vmovdqu ymm8, yword [rdi]
LONG $0xc8d43dc5 // vpaddq ymm9, ymm8, ymm0
LONG $0xd1ef3dc5 // vpxor ymm10, ymm8, ymm1
LONG $0xd2d42dc5 // vpaddq ymm10, ymm10, ymm2
LONG $0xdbef3dc5 // vpxor ymm11, ymm8, ymm3
LONG $0xdad425c5 // vpaddq ymm11, ymm11, ymm2
LONG $0xe4ef3dc5 // vpxor ymm12, ymm8, ymm4
LONG $0xe2d41dc5 // vpaddq ymm12, ymm12, ymm2
LONG $0xeb2541c4; BYTE $0xdc // vpor ymm11, ymm11, ymm12
LONG $0xe5ef3dc5 // vpxor ymm12, ymm8, ymm5
LONG $0xe2d41dc5 // vpaddq ymm12, ymm12, ymm2
LONG $0xeeef3dc5 // vpxor ymm13, ymm8, ymm6
LONG $0xead415c5 // vpaddq ymm13, ymm13, ymm2
LONG $0xeb1d41c4; BYTE $0xe5 // vpor ymm12, ymm12, ymm13
LONG $0xeb3541c4; BYTE $0xc0 // vpor ymm8, ymm9, ymm8
LONG $0xeb3d41c4; BYTE $0xc2 // vpor ymm8, ymm8, ymm10
LONG $0xeb3d41c4; BYTE $0xc3 // vpor ymm8, ymm8, ymm11
LONG $0xeb3d41c4; BYTE $0xc4 // vpor ymm8, ymm8, ymm12
LONG $0xc7db3dc5 // vpand ymm8, ymm8, ymm7
LONG $0xd77dc1c4; BYTE $0xd0 // vpmovmskb edx, ymm8
WORD $0xd285 // test edx, edx
JNE LBB1_3
LONG $0x20c78348 // add rdi, 32
LONG $0x01c18348 // add rcx, 1
WORD $0x3948; BYTE $0xc1 // cmp rcx, rax
JB LBB1_2
LBB1_5:
LONG $0x05e0c148 // shl rax, 5
JMP LBB1_6
LBB1_3:
WORD $0xbc0f; BYTE $0xc2 // bsf eax, edx
LBB1_6:
VZEROUPPER
MOVQ AX, ret+16(FP)
RET