mirror of https://github.com/goccy/go-json.git
Update SIMD codes
This commit is contained in:
parent
4019c11e82
commit
118663d59f
|
@ -3,7 +3,7 @@
|
|||
#include <string.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
uint64_t findEscapeIndex64(char *buf, int len) {
|
||||
uint64_t findHTMLEscapeIndex64(char *buf, int len) {
|
||||
static const uint64_t lsb = 0x0101010101010101;
|
||||
static const uint64_t msb = 0x8080808080808080;
|
||||
|
||||
|
@ -29,7 +29,7 @@ uint64_t findEscapeIndex64(char *buf, int len) {
|
|||
return 8 * chunkLen;
|
||||
}
|
||||
|
||||
uint64_t findEscapeIndex128(char *buf, int len) {
|
||||
uint64_t findHTMLEscapeIndex128(char *buf, int len) {
|
||||
static const uint64_t lsb = 0x0101010101010101;
|
||||
static const uint64_t msb = 0x8080808080808080;
|
||||
|
||||
|
@ -71,12 +71,12 @@ uint64_t findEscapeIndex128(char *buf, int len) {
|
|||
}
|
||||
int idx = 16 * chunkLen;
|
||||
if (len - idx >= 8) {
|
||||
return idx + findEscapeIndex64(sp, len - idx);
|
||||
return idx + findHTMLEscapeIndex64(sp, len - idx);
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
uint64_t findEscapeIndex256(char *buf, int len) {
|
||||
uint64_t findHTMLEscapeIndex256(char *buf, int len) {
|
||||
static const uint64_t lsb = 0x0101010101010101;
|
||||
static const uint64_t msb = 0x8080808080808080;
|
||||
|
||||
|
@ -118,6 +118,108 @@ uint64_t findEscapeIndex256(char *buf, int len) {
|
|||
}
|
||||
int idx = 32 * chunkLen;
|
||||
int remainLen = len - idx;
|
||||
if (remainLen >= 16) {
|
||||
return idx + findHTMLEscapeIndex128(sp, remainLen);
|
||||
} else if (remainLen >= 8) {
|
||||
return idx + findHTMLEscapeIndex64(sp, remainLen);
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
uint64_t findEscapeIndex64(char *buf, int len) {
|
||||
static const uint64_t lsb = 0x0101010101010101;
|
||||
static const uint64_t msb = 0x8080808080808080;
|
||||
|
||||
static const uint64_t space = lsb * 0x20;
|
||||
static const uint64_t quote = lsb * '"';
|
||||
static const uint64_t escape = lsb * '\\';
|
||||
|
||||
char *sp = buf;
|
||||
size_t chunkLen = len / 8;
|
||||
int chunkIdx = 0;
|
||||
for (; chunkIdx < chunkLen; chunkIdx++) {
|
||||
uint64_t n = *(uint64_t *)sp;
|
||||
uint64_t mask = n | (n - space) | ((n ^ quote) - lsb) | ((n ^ escape) - lsb);
|
||||
uint64_t masked = mask & msb;
|
||||
if (masked != 0) {
|
||||
return __builtin_ctz(masked) / 8;
|
||||
}
|
||||
sp += 8;
|
||||
}
|
||||
return 8 * chunkLen;
|
||||
}
|
||||
|
||||
uint64_t findEscapeIndex128(char *buf, int len) {
|
||||
static const uint64_t lsb = 0x0101010101010101;
|
||||
static const uint64_t msb = 0x8080808080808080;
|
||||
|
||||
static const __m64 space = (__m64)(lsb * 0x20);
|
||||
static const __m64 quote = (__m64)(lsb * '"');
|
||||
static const __m64 escape = (__m64)(lsb * '\\');
|
||||
|
||||
__m128i zeroV = _mm_setzero_si128();
|
||||
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
|
||||
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
|
||||
__m128i spaceV = _mm_set_epi64(space, space);
|
||||
__m128i quoteV = _mm_set_epi64(quote, quote);
|
||||
__m128i escapeV = _mm_set_epi64(escape, escape);
|
||||
|
||||
char *sp = buf;
|
||||
size_t chunkLen = len / 16;
|
||||
int chunkIdx = 0;
|
||||
for (; chunkIdx < chunkLen; chunkIdx++) {
|
||||
__m128i n = _mm_loadu_si128((const void *)sp);
|
||||
__m128i spaceN = _mm_sub_epi64(n, spaceV);
|
||||
__m128i quoteN = _mm_sub_epi64(_mm_xor_si128(n, quoteV), lsbV);
|
||||
__m128i escapeN = _mm_sub_epi64(_mm_xor_si128(n, escapeV), lsbV);
|
||||
|
||||
__m128i mask = _mm_or_si128(_mm_or_si128(_mm_or_si128(n, spaceN), quoteN), escapeN);
|
||||
int movemask = _mm_movemask_epi8(_mm_and_si128(mask, msbV));
|
||||
if (movemask != 0) {
|
||||
return __builtin_ctz(movemask);
|
||||
}
|
||||
sp += 16;
|
||||
}
|
||||
int idx = 16 * chunkLen;
|
||||
if (len - idx >= 8) {
|
||||
return idx + findEscapeIndex64(sp, len - idx);
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
uint64_t findEscapeIndex256(char *buf, int len) {
|
||||
static const uint64_t lsb = 0x0101010101010101;
|
||||
static const uint64_t msb = 0x8080808080808080;
|
||||
|
||||
static const __m64 space = (__m64)(lsb * 0x20);
|
||||
static const __m64 quote = (__m64)(lsb * '"');
|
||||
static const __m64 escape = (__m64)(lsb * '\\');
|
||||
|
||||
__m256i zeroV = _mm256_setzero_si256();
|
||||
__m256i msbV = _mm256_set1_epi64x(msb);
|
||||
__m256i lsbV = _mm256_set1_epi64x(lsb);
|
||||
__m256i spaceV = _mm256_set1_epi64x(space);
|
||||
__m256i quoteV = _mm256_set1_epi64x(quote);
|
||||
__m256i escapeV = _mm256_set1_epi64x(escape);
|
||||
|
||||
char *sp = buf;
|
||||
size_t chunkLen = len / 32;
|
||||
int chunkIdx = 0;
|
||||
for (; chunkIdx < chunkLen; chunkIdx++) {
|
||||
__m256i n = _mm256_loadu_si256((const void *)sp);
|
||||
__m256i spaceN = _mm256_sub_epi64(n, spaceV);
|
||||
__m256i quoteN = _mm256_sub_epi64(_mm256_xor_si256(n, quoteV), lsbV);
|
||||
__m256i escapeN = _mm256_sub_epi64(_mm256_xor_si256(n, escapeV), lsbV);
|
||||
|
||||
__m256i mask = _mm256_or_si256(_mm256_or_si256(_mm256_or_si256(n, spaceN), quoteN), escapeN);
|
||||
int movemask = _mm256_movemask_epi8(_mm256_and_si256(mask, msbV));
|
||||
if (movemask != 0) {
|
||||
return __builtin_ctz(movemask);
|
||||
}
|
||||
sp += 32;
|
||||
}
|
||||
int idx = 32 * chunkLen;
|
||||
int remainLen = len - idx;
|
||||
if (remainLen >= 16) {
|
||||
return idx + findEscapeIndex128(sp, remainLen);
|
||||
} else if (remainLen >= 8) {
|
||||
|
|
|
@ -2,6 +2,18 @@ package encoder
|
|||
|
||||
import "unsafe"
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findHTMLEscapeIndex64(buf unsafe.Pointer, len int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findHTMLEscapeIndex128(buf unsafe.Pointer, len int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findHTMLEscapeIndex256(buf unsafe.Pointer, len int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findEscapeIndex64(buf unsafe.Pointer, len int) (ret int)
|
||||
|
|
|
@ -402,12 +402,13 @@ func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]b
|
|||
code = code.End.Next
|
||||
break
|
||||
}
|
||||
b = appendStructHead(ctx, b)
|
||||
mapCtx := encoder.NewMapContext(mlen)
|
||||
unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
|
||||
mapCtx := encoder.NewMapContext(mlen, unorderedMap)
|
||||
mapiterinit(code.Type, uptr, &mapCtx.Iter)
|
||||
store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
|
||||
ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
|
||||
if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
|
||||
b = appendStructHead(ctx, b)
|
||||
if unorderedMap {
|
||||
b = appendMapKeyIndent(ctx, code.Next, b)
|
||||
} else {
|
||||
mapCtx.Start = len(b)
|
||||
|
|
|
@ -259,12 +259,14 @@ var mapContextPool = sync.Pool{
|
|||
},
|
||||
}
|
||||
|
||||
func NewMapContext(mapLen int) *MapContext {
|
||||
func NewMapContext(mapLen int, unorderedMap bool) *MapContext {
|
||||
ctx := mapContextPool.Get().(*MapContext)
|
||||
if len(ctx.Slice.Items) < mapLen {
|
||||
ctx.Slice.Items = make([]MapItem, mapLen)
|
||||
} else {
|
||||
ctx.Slice.Items = ctx.Slice.Items[:mapLen]
|
||||
if !unorderedMap {
|
||||
if len(ctx.Slice.Items) < mapLen {
|
||||
ctx.Slice.Items = make([]MapItem, mapLen)
|
||||
} else {
|
||||
ctx.Slice.Items = ctx.Slice.Items[:mapLen]
|
||||
}
|
||||
}
|
||||
ctx.Buf = ctx.Buf[:0]
|
||||
ctx.Iter = mapIter{}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package encoder
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
|
@ -374,11 +373,11 @@ func AppendString(ctx *RuntimeContext, buf []byte, s string) []byte {
|
|||
switch valLen {
|
||||
case 1, 2, 3, 4, 5, 6, 7:
|
||||
case 8, 9, 10, 11, 12, 13, 14, 15:
|
||||
j = _findEscapeIndex64((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
j = _findHTMLEscapeIndex64((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
case 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31:
|
||||
j = _findEscapeIndex128((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
j = _findHTMLEscapeIndex128((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
default:
|
||||
j = _findEscapeIndex256((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
j = _findHTMLEscapeIndex256((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
}
|
||||
for j < valLen {
|
||||
c := s[j]
|
||||
|
@ -479,30 +478,15 @@ func appendString(buf []byte, s string) []byte {
|
|||
var (
|
||||
i, j int
|
||||
)
|
||||
if valLen >= 8 {
|
||||
chunks := stringToUint64Slice(s)
|
||||
for _, n := range chunks {
|
||||
// combine masks before checking for the MSB of each byte. We include
|
||||
// `n` in the mask to check whether any of the *input* byte MSBs were
|
||||
// set (i.e. the byte was outside the ASCII range).
|
||||
mask := n | (n - (lsb * 0x20)) |
|
||||
((n ^ (lsb * '"')) - lsb) |
|
||||
((n ^ (lsb * '\\')) - lsb)
|
||||
if (mask & msb) != 0 {
|
||||
j = bits.TrailingZeros64(mask&msb) / 8
|
||||
goto ESCAPE_END
|
||||
}
|
||||
}
|
||||
valLen := len(s)
|
||||
for i := len(chunks) * 8; i < valLen; i++ {
|
||||
if needEscape[s[i]] {
|
||||
j = i
|
||||
goto ESCAPE_END
|
||||
}
|
||||
}
|
||||
return append(append(buf, s...), '"')
|
||||
switch valLen {
|
||||
case 1, 2, 3, 4, 5, 6, 7:
|
||||
case 8, 9, 10, 11, 12, 13, 14, 15:
|
||||
j = _findEscapeIndex64((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
case 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31:
|
||||
j = _findEscapeIndex128((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
default:
|
||||
j = _findEscapeIndex256((*runtime.SliceHeader)(unsafe.Pointer(&s)).Data, len(s))
|
||||
}
|
||||
ESCAPE_END:
|
||||
for j < valLen {
|
||||
c := s[j]
|
||||
|
||||
|
|
|
@ -2,6 +2,18 @@ package encoder
|
|||
|
||||
import "unsafe"
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findHTMLEscapeIndex64(buf unsafe.Pointer, len int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findHTMLEscapeIndex128(buf unsafe.Pointer, len int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findHTMLEscapeIndex256(buf unsafe.Pointer, len int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
func _findEscapeIndex64(buf unsafe.Pointer, len int) (ret int)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//+build !noasm !appengine
|
||||
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
|
||||
|
||||
TEXT ·_findEscapeIndex64(SB), $0-24
|
||||
TEXT ·_findHTMLEscapeIndex64(SB), $0-24
|
||||
|
||||
MOVQ buf+0(FP), DI
|
||||
MOVQ len+8(FP), SI
|
||||
|
@ -84,7 +84,7 @@ DATA LCDATA1<>+0x070(SB)/8, $0x8080808080808080
|
|||
DATA LCDATA1<>+0x078(SB)/8, $0x8080808080808080
|
||||
GLOBL LCDATA1<>(SB), 8, $128
|
||||
|
||||
TEXT ·_findEscapeIndex128(SB), $16-24
|
||||
TEXT ·_findHTMLEscapeIndex128(SB), $16-24
|
||||
|
||||
MOVQ buf+0(FP), DI
|
||||
MOVQ len+8(FP), SI
|
||||
|
@ -237,7 +237,7 @@ DATA LCDATA2<>+0x0d0(SB)/8, $0x8080808080808080
|
|||
DATA LCDATA2<>+0x0d8(SB)/8, $0x8080808080808080
|
||||
GLOBL LCDATA2<>(SB), 8, $224
|
||||
|
||||
TEXT ·_findEscapeIndex256(SB), $16-24
|
||||
TEXT ·_findHTMLEscapeIndex256(SB), $16-24
|
||||
|
||||
MOVQ buf+0(FP), DI
|
||||
MOVQ len+8(FP), SI
|
||||
|
@ -470,3 +470,349 @@ LBB2_25:
|
|||
VZEROUPPER
|
||||
MOVQ AX, ret+16(FP)
|
||||
RET
|
||||
|
||||
TEXT ·_findEscapeIndex64(SB), $0-24
|
||||
|
||||
MOVQ buf+0(FP), DI
|
||||
MOVQ len+8(FP), SI
|
||||
|
||||
WORD $0xf089 // mov eax, esi
|
||||
WORD $0xf8c1; BYTE $0x1f // sar eax, 31
|
||||
WORD $0xe8c1; BYTE $0x1d // shr eax, 29
|
||||
WORD $0xf001 // add eax, esi
|
||||
WORD $0xf8c1; BYTE $0x03 // sar eax, 3
|
||||
WORD $0x9848 // cdqe
|
||||
WORD $0xc683; BYTE $0x07 // add esi, 7
|
||||
WORD $0xfe83; BYTE $0x0f // cmp esi, 15
|
||||
JB LBB3_5
|
||||
QUAD $0xfefefefefeffbe49; WORD $0xfefe // mov r14, -72340172838076673
|
||||
WORD $0xd231 // xor edx, edx
|
||||
QUAD $0xdfdfdfdfdfe0b849; WORD $0xdfdf // mov r8, -2314885530818453536
|
||||
QUAD $0x222222222222b949; WORD $0x2222 // mov r9, 2459565876494606882
|
||||
QUAD $0x5c5c5c5c5c5cba49; WORD $0x5c5c // mov r10, 6655295901103053916
|
||||
QUAD $0x808080808080bb49; WORD $0x8080 // mov r11, -9187201950435737472
|
||||
|
||||
LBB3_2:
|
||||
LONG $0xd7348b48 // mov rsi, qword [rdi + 8*rdx]
|
||||
LONG $0x061c8d4a // lea rbx, [rsi + r8]
|
||||
WORD $0x0948; BYTE $0xf3 // or rbx, rsi
|
||||
WORD $0x8948; BYTE $0xf1 // mov rcx, rsi
|
||||
WORD $0x314c; BYTE $0xc9 // xor rcx, r9
|
||||
WORD $0x014c; BYTE $0xf1 // add rcx, r14
|
||||
WORD $0x0948; BYTE $0xd9 // or rcx, rbx
|
||||
WORD $0x314c; BYTE $0xd6 // xor rsi, r10
|
||||
WORD $0x014c; BYTE $0xf6 // add rsi, r14
|
||||
WORD $0x0948; BYTE $0xce // or rsi, rcx
|
||||
WORD $0x214c; BYTE $0xde // and rsi, r11
|
||||
JNE LBB3_3
|
||||
LONG $0x01c28348 // add rdx, 1
|
||||
WORD $0x3948; BYTE $0xc2 // cmp rdx, rax
|
||||
JB LBB3_2
|
||||
|
||||
LBB3_5:
|
||||
LONG $0x03e0c148 // shl rax, 3
|
||||
JMP LBB3_6
|
||||
|
||||
LBB3_3:
|
||||
WORD $0xbc0f; BYTE $0xc6 // bsf eax, esi
|
||||
WORD $0xe8c1; BYTE $0x03 // shr eax, 3
|
||||
|
||||
LBB3_6:
|
||||
MOVQ AX, ret+16(FP)
|
||||
RET
|
||||
|
||||
DATA LCDATA3<>+0x000(SB)/8, $0xdfdfdfdfdfdfdfe0
|
||||
DATA LCDATA3<>+0x008(SB)/8, $0xdfdfdfdfdfdfdfe0
|
||||
DATA LCDATA3<>+0x010(SB)/8, $0x2222222222222222
|
||||
DATA LCDATA3<>+0x018(SB)/8, $0x2222222222222222
|
||||
DATA LCDATA3<>+0x020(SB)/8, $0xfefefefefefefeff
|
||||
DATA LCDATA3<>+0x028(SB)/8, $0xfefefefefefefeff
|
||||
DATA LCDATA3<>+0x030(SB)/8, $0x5c5c5c5c5c5c5c5c
|
||||
DATA LCDATA3<>+0x038(SB)/8, $0x5c5c5c5c5c5c5c5c
|
||||
DATA LCDATA3<>+0x040(SB)/8, $0x8080808080808080
|
||||
DATA LCDATA3<>+0x048(SB)/8, $0x8080808080808080
|
||||
GLOBL LCDATA3<>(SB), 8, $80
|
||||
|
||||
TEXT ·_findEscapeIndex128(SB), $0-24
|
||||
|
||||
MOVQ buf+0(FP), DI
|
||||
MOVQ len+8(FP), SI
|
||||
LEAQ LCDATA3<>(SB), BP
|
||||
|
||||
WORD $0xf089 // mov eax, esi
|
||||
WORD $0xf8c1; BYTE $0x1f // sar eax, 31
|
||||
WORD $0xe8c1; BYTE $0x1c // shr eax, 28
|
||||
WORD $0xf001 // add eax, esi
|
||||
WORD $0xf8c1; BYTE $0x04 // sar eax, 4
|
||||
WORD $0x4e8d; BYTE $0x0f // lea ecx, [rsi + 15]
|
||||
WORD $0xf983; BYTE $0x1f // cmp ecx, 31
|
||||
JB LBB4_5
|
||||
WORD $0x6348; BYTE $0xc8 // movsxd rcx, eax
|
||||
WORD $0xd231 // xor edx, edx
|
||||
LONG $0x456f79c5; BYTE $0x00 // vmovdqa xmm8, oword 0[rbp] /* [rip + .LCPI4_0] */
|
||||
LONG $0x4d6ff9c5; BYTE $0x10 // vmovdqa xmm1, oword 16[rbp] /* [rip + .LCPI4_1] */
|
||||
LONG $0x556ff9c5; BYTE $0x20 // vmovdqa xmm2, oword 32[rbp] /* [rip + .LCPI4_2] */
|
||||
LONG $0x5d6ff9c5; BYTE $0x30 // vmovdqa xmm3, oword 48[rbp] /* [rip + .LCPI4_3] */
|
||||
LONG $0x656ff9c5; BYTE $0x40 // vmovdqa xmm4, oword 64[rbp] /* [rip + .LCPI4_4] */
|
||||
|
||||
LBB4_2:
|
||||
LONG $0x2f6ffac5 // vmovdqu xmm5, oword [rdi]
|
||||
LONG $0xd451c1c4; BYTE $0xf0 // vpaddq xmm6, xmm5, xmm8
|
||||
LONG $0xf9efd1c5 // vpxor xmm7, xmm5, xmm1
|
||||
LONG $0xfad4c1c5 // vpaddq xmm7, xmm7, xmm2
|
||||
LONG $0xc3efd1c5 // vpxor xmm0, xmm5, xmm3
|
||||
LONG $0xc2d4f9c5 // vpaddq xmm0, xmm0, xmm2
|
||||
LONG $0xedebc9c5 // vpor xmm5, xmm6, xmm5
|
||||
LONG $0xefebd1c5 // vpor xmm5, xmm5, xmm7
|
||||
LONG $0xc0ebd1c5 // vpor xmm0, xmm5, xmm0
|
||||
LONG $0xc4dbf9c5 // vpand xmm0, xmm0, xmm4
|
||||
LONG $0xd8d7f9c5 // vpmovmskb ebx, xmm0
|
||||
WORD $0xdb85 // test ebx, ebx
|
||||
JNE LBB4_3
|
||||
LONG $0x10c78348 // add rdi, 16
|
||||
LONG $0x01c28348 // add rdx, 1
|
||||
WORD $0x3948; BYTE $0xca // cmp rdx, rcx
|
||||
JB LBB4_2
|
||||
|
||||
LBB4_5:
|
||||
WORD $0xe0c1; BYTE $0x04 // shl eax, 4
|
||||
WORD $0xc629 // sub esi, eax
|
||||
WORD $0x9848 // cdqe
|
||||
WORD $0xfe83; BYTE $0x08 // cmp esi, 8
|
||||
JL LBB4_12
|
||||
QUAD $0xfefefefefeffb849; WORD $0xfefe // mov r8, -72340172838076673
|
||||
WORD $0x8941; BYTE $0xf7 // mov r15d, esi
|
||||
LONG $0x03efc141 // shr r15d, 3
|
||||
WORD $0xc931 // xor ecx, ecx
|
||||
QUAD $0xdfdfdfdfdfe0b949; WORD $0xdfdf // mov r9, -2314885530818453536
|
||||
QUAD $0x222222222222ba49; WORD $0x2222 // mov r10, 2459565876494606882
|
||||
QUAD $0x5c5c5c5c5c5cbb49; WORD $0x5c5c // mov r11, 6655295901103053916
|
||||
QUAD $0x808080808080be49; WORD $0x8080 // mov r14, -9187201950435737472
|
||||
|
||||
LBB4_7:
|
||||
LONG $0xcf1c8b48 // mov rbx, qword [rdi + 8*rcx]
|
||||
LONG $0x0b248d4e // lea r12, [rbx + r9]
|
||||
WORD $0x0949; BYTE $0xdc // or r12, rbx
|
||||
WORD $0x8948; BYTE $0xda // mov rdx, rbx
|
||||
WORD $0x314c; BYTE $0xd2 // xor rdx, r10
|
||||
WORD $0x014c; BYTE $0xc2 // add rdx, r8
|
||||
WORD $0x094c; BYTE $0xe2 // or rdx, r12
|
||||
WORD $0x314c; BYTE $0xdb // xor rbx, r11
|
||||
WORD $0x014c; BYTE $0xc3 // add rbx, r8
|
||||
WORD $0x0948; BYTE $0xd3 // or rbx, rdx
|
||||
WORD $0x214c; BYTE $0xf3 // and rbx, r14
|
||||
JNE LBB4_8
|
||||
LONG $0x01c18348 // add rcx, 1
|
||||
WORD $0x394c; BYTE $0xf9 // cmp rcx, r15
|
||||
JB LBB4_7
|
||||
WORD $0xe683; BYTE $0xf8 // and esi, -8
|
||||
JMP LBB4_11
|
||||
|
||||
LBB4_3:
|
||||
WORD $0xbc0f; BYTE $0xc3 // bsf eax, ebx
|
||||
JMP LBB4_12
|
||||
|
||||
LBB4_8:
|
||||
WORD $0xbc0f; BYTE $0xf3 // bsf esi, ebx
|
||||
WORD $0xeec1; BYTE $0x03 // shr esi, 3
|
||||
|
||||
LBB4_11:
|
||||
WORD $0xf189 // mov ecx, esi
|
||||
WORD $0x0148; BYTE $0xc8 // add rax, rcx
|
||||
|
||||
LBB4_12:
|
||||
MOVQ AX, ret+16(FP)
|
||||
RET
|
||||
|
||||
DATA LCDATA4<>+0x000(SB)/8, $0xdfdfdfdfdfdfdfe0
|
||||
DATA LCDATA4<>+0x008(SB)/8, $0x2222222222222222
|
||||
DATA LCDATA4<>+0x010(SB)/8, $0xfefefefefefefeff
|
||||
DATA LCDATA4<>+0x018(SB)/8, $0x5c5c5c5c5c5c5c5c
|
||||
DATA LCDATA4<>+0x020(SB)/8, $0x8080808080808080
|
||||
DATA LCDATA4<>+0x028(SB)/8, $0x8080808080808080
|
||||
DATA LCDATA4<>+0x030(SB)/8, $0x8080808080808080
|
||||
DATA LCDATA4<>+0x038(SB)/8, $0x8080808080808080
|
||||
DATA LCDATA4<>+0x040(SB)/8, $0xdfdfdfdfdfdfdfe0
|
||||
DATA LCDATA4<>+0x048(SB)/8, $0xdfdfdfdfdfdfdfe0
|
||||
DATA LCDATA4<>+0x050(SB)/8, $0x2222222222222222
|
||||
DATA LCDATA4<>+0x058(SB)/8, $0x2222222222222222
|
||||
DATA LCDATA4<>+0x060(SB)/8, $0xfefefefefefefeff
|
||||
DATA LCDATA4<>+0x068(SB)/8, $0xfefefefefefefeff
|
||||
DATA LCDATA4<>+0x070(SB)/8, $0x5c5c5c5c5c5c5c5c
|
||||
DATA LCDATA4<>+0x078(SB)/8, $0x5c5c5c5c5c5c5c5c
|
||||
DATA LCDATA4<>+0x080(SB)/8, $0x8080808080808080
|
||||
DATA LCDATA4<>+0x088(SB)/8, $0x8080808080808080
|
||||
GLOBL LCDATA4<>(SB), 8, $144
|
||||
|
||||
TEXT ·_findEscapeIndex256(SB), $0-24
|
||||
|
||||
MOVQ buf+0(FP), DI
|
||||
MOVQ len+8(FP), SI
|
||||
LEAQ LCDATA4<>(SB), BP
|
||||
|
||||
WORD $0xf089 // mov eax, esi
|
||||
WORD $0xf8c1; BYTE $0x1f // sar eax, 31
|
||||
WORD $0xe8c1; BYTE $0x1b // shr eax, 27
|
||||
WORD $0xf001 // add eax, esi
|
||||
WORD $0xf8c1; BYTE $0x05 // sar eax, 5
|
||||
WORD $0x4e8d; BYTE $0x1f // lea ecx, [rsi + 31]
|
||||
WORD $0xf983; BYTE $0x3f // cmp ecx, 63
|
||||
JB LBB5_4
|
||||
WORD $0x6348; BYTE $0xc8 // movsxd rcx, eax
|
||||
LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI5_0] */
|
||||
LONG $0x597de2c4; WORD $0x084d // vpbroadcastq ymm1, qword 8[rbp] /* [rip + .LCPI5_1] */
|
||||
LONG $0x597de2c4; WORD $0x1055 // vpbroadcastq ymm2, qword 16[rbp] /* [rip + .LCPI5_2] */
|
||||
WORD $0xd231 // xor edx, edx
|
||||
LONG $0x597de2c4; WORD $0x185d // vpbroadcastq ymm3, qword 24[rbp] /* [rip + .LCPI5_3] */
|
||||
LONG $0x656ffdc5; BYTE $0x20 // vmovdqa ymm4, yword 32[rbp] /* [rip + .LCPI5_4] */
|
||||
|
||||
LBB5_2:
|
||||
LONG $0x2f6ffec5 // vmovdqu ymm5, yword [rdi]
|
||||
LONG $0xf0d4d5c5 // vpaddq ymm6, ymm5, ymm0
|
||||
LONG $0xf9efd5c5 // vpxor ymm7, ymm5, ymm1
|
||||
LONG $0xfad4c5c5 // vpaddq ymm7, ymm7, ymm2
|
||||
LONG $0xc3ef55c5 // vpxor ymm8, ymm5, ymm3
|
||||
LONG $0xc2d43dc5 // vpaddq ymm8, ymm8, ymm2
|
||||
LONG $0xedebcdc5 // vpor ymm5, ymm6, ymm5
|
||||
LONG $0xefebd5c5 // vpor ymm5, ymm5, ymm7
|
||||
LONG $0xeb55c1c4; BYTE $0xe8 // vpor ymm5, ymm5, ymm8
|
||||
LONG $0xecdbd5c5 // vpand ymm5, ymm5, ymm4
|
||||
LONG $0xddd7fdc5 // vpmovmskb ebx, ymm5
|
||||
WORD $0xdb85 // test ebx, ebx
|
||||
JNE LBB5_18
|
||||
LONG $0x20c78348 // add rdi, 32
|
||||
LONG $0x01c28348 // add rdx, 1
|
||||
WORD $0x3948; BYTE $0xca // cmp rdx, rcx
|
||||
JB LBB5_2
|
||||
|
||||
LBB5_4:
|
||||
WORD $0xe0c1; BYTE $0x05 // shl eax, 5
|
||||
WORD $0xc629 // sub esi, eax
|
||||
WORD $0xfe83; BYTE $0x10 // cmp esi, 16
|
||||
JL LBB5_13
|
||||
WORD $0x634c; BYTE $0xc0 // movsxd r8, eax
|
||||
WORD $0xf089 // mov eax, esi
|
||||
WORD $0xe8c1; BYTE $0x04 // shr eax, 4
|
||||
WORD $0xc931 // xor ecx, ecx
|
||||
LONG $0x456f79c5; BYTE $0x40 // vmovdqa xmm8, oword 64[rbp] /* [rip + .LCPI5_5] */
|
||||
LONG $0x4d6ff9c5; BYTE $0x50 // vmovdqa xmm1, oword 80[rbp] /* [rip + .LCPI5_6] */
|
||||
LONG $0x556ff9c5; BYTE $0x60 // vmovdqa xmm2, oword 96[rbp] /* [rip + .LCPI5_7] */
|
||||
LONG $0x5d6ff9c5; BYTE $0x70 // vmovdqa xmm3, oword 112[rbp] /* [rip + .LCPI5_8] */
|
||||
QUAD $0x00000080a56ff9c5 // vmovdqa xmm4, oword 128[rbp] /* [rip + .LCPI5_9] */
|
||||
|
||||
LBB5_6:
|
||||
LONG $0x2f6ffac5 // vmovdqu xmm5, oword [rdi]
|
||||
LONG $0xd451c1c4; BYTE $0xf0 // vpaddq xmm6, xmm5, xmm8
|
||||
LONG $0xf9efd1c5 // vpxor xmm7, xmm5, xmm1
|
||||
LONG $0xfad4c1c5 // vpaddq xmm7, xmm7, xmm2
|
||||
LONG $0xc3efd1c5 // vpxor xmm0, xmm5, xmm3
|
||||
LONG $0xc2d4f9c5 // vpaddq xmm0, xmm0, xmm2
|
||||
LONG $0xedebc9c5 // vpor xmm5, xmm6, xmm5
|
||||
LONG $0xefebd1c5 // vpor xmm5, xmm5, xmm7
|
||||
LONG $0xc0ebd1c5 // vpor xmm0, xmm5, xmm0
|
||||
LONG $0xc4dbf9c5 // vpand xmm0, xmm0, xmm4
|
||||
LONG $0xd0d7f9c5 // vpmovmskb edx, xmm0
|
||||
WORD $0xd285 // test edx, edx
|
||||
JNE LBB5_19
|
||||
LONG $0x10c78348 // add rdi, 16
|
||||
LONG $0x01c18348 // add rcx, 1
|
||||
WORD $0x3948; BYTE $0xc1 // cmp rcx, rax
|
||||
JB LBB5_6
|
||||
WORD $0xf089 // mov eax, esi
|
||||
WORD $0xe083; BYTE $0xf0 // and eax, -16
|
||||
WORD $0xc629 // sub esi, eax
|
||||
WORD $0x9848 // cdqe
|
||||
WORD $0xfe83; BYTE $0x08 // cmp esi, 8
|
||||
JL LBB5_24
|
||||
QUAD $0xfefefefefeffbf49; WORD $0xfefe // mov r15, -72340172838076673
|
||||
QUAD $0xdfdfdfdfdfe0b949; WORD $0xdfdf // mov r9, -2314885530818453536
|
||||
QUAD $0x808080808080ba49; WORD $0x8080 // mov r10, -9187201950435737472
|
||||
QUAD $0x5c5c5c5c5c5cbb49; WORD $0x5c5c // mov r11, 6655295901103053916
|
||||
QUAD $0x222222222222be49; WORD $0x2222 // mov r14, 2459565876494606882
|
||||
WORD $0x8941; BYTE $0xf4 // mov r12d, esi
|
||||
LONG $0x03ecc141 // shr r12d, 3
|
||||
WORD $0xc931 // xor ecx, ecx
|
||||
|
||||
LBB5_10:
|
||||
LONG $0xcf148b48 // mov rdx, qword [rdi + 8*rcx]
|
||||
LONG $0x0a2c8d4e // lea r13, [rdx + r9]
|
||||
WORD $0x0949; BYTE $0xd5 // or r13, rdx
|
||||
WORD $0x8948; BYTE $0xd3 // mov rbx, rdx
|
||||
WORD $0x314c; BYTE $0xf3 // xor rbx, r14
|
||||
WORD $0x014c; BYTE $0xfb // add rbx, r15
|
||||
WORD $0x094c; BYTE $0xeb // or rbx, r13
|
||||
WORD $0x314c; BYTE $0xda // xor rdx, r11
|
||||
WORD $0x014c; BYTE $0xfa // add rdx, r15
|
||||
WORD $0x0948; BYTE $0xda // or rdx, rbx
|
||||
WORD $0x214c; BYTE $0xd2 // and rdx, r10
|
||||
JNE LBB5_22
|
||||
LONG $0x01c18348 // add rcx, 1
|
||||
WORD $0x394c; BYTE $0xe1 // cmp rcx, r12
|
||||
JB LBB5_10
|
||||
WORD $0xe683; BYTE $0xf8 // and esi, -8
|
||||
JMP LBB5_23
|
||||
|
||||
LBB5_13:
|
||||
WORD $0x9848 // cdqe
|
||||
WORD $0xfe83; BYTE $0x08 // cmp esi, 8
|
||||
JL LBB5_25
|
||||
QUAD $0xfefefefefeffbe49; WORD $0xfefe // mov r14, -72340172838076673
|
||||
QUAD $0xdfdfdfdfdfe0b849; WORD $0xdfdf // mov r8, -2314885530818453536
|
||||
QUAD $0x808080808080b949; WORD $0x8080 // mov r9, -9187201950435737472
|
||||
QUAD $0x5c5c5c5c5c5cba49; WORD $0x5c5c // mov r10, 6655295901103053916
|
||||
QUAD $0x222222222222bb49; WORD $0x2222 // mov r11, 2459565876494606882
|
||||
WORD $0xeec1; BYTE $0x03 // shr esi, 3
|
||||
WORD $0xe683; BYTE $0x1f // and esi, 31
|
||||
WORD $0xdb31 // xor ebx, ebx
|
||||
|
||||
LBB5_15:
|
||||
LONG $0xdf148b48 // mov rdx, qword [rdi + 8*rbx]
|
||||
LONG $0x023c8d4e // lea r15, [rdx + r8]
|
||||
WORD $0x0949; BYTE $0xd7 // or r15, rdx
|
||||
WORD $0x8948; BYTE $0xd1 // mov rcx, rdx
|
||||
WORD $0x314c; BYTE $0xd9 // xor rcx, r11
|
||||
WORD $0x014c; BYTE $0xf1 // add rcx, r14
|
||||
WORD $0x094c; BYTE $0xf9 // or rcx, r15
|
||||
WORD $0x314c; BYTE $0xd2 // xor rdx, r10
|
||||
WORD $0x014c; BYTE $0xf2 // add rdx, r14
|
||||
WORD $0x0948; BYTE $0xca // or rdx, rcx
|
||||
WORD $0x214c; BYTE $0xca // and rdx, r9
|
||||
JNE LBB5_20
|
||||
LONG $0x01c38348 // add rbx, 1
|
||||
WORD $0x3948; BYTE $0xf3 // cmp rbx, rsi
|
||||
JB LBB5_15
|
||||
WORD $0xe6c1; BYTE $0x03 // shl esi, 3
|
||||
JMP LBB5_21
|
||||
|
||||
LBB5_18:
|
||||
WORD $0xbc0f; BYTE $0xc3 // bsf eax, ebx
|
||||
JMP LBB5_25
|
||||
|
||||
LBB5_19:
|
||||
WORD $0xbc0f; BYTE $0xc2 // bsf eax, edx
|
||||
JMP LBB5_24
|
||||
|
||||
LBB5_20:
|
||||
WORD $0xbc0f; BYTE $0xf2 // bsf esi, edx
|
||||
WORD $0xeec1; BYTE $0x03 // shr esi, 3
|
||||
|
||||
LBB5_21:
|
||||
WORD $0xf189 // mov ecx, esi
|
||||
WORD $0x0148; BYTE $0xc8 // add rax, rcx
|
||||
JMP LBB5_25
|
||||
|
||||
LBB5_22:
|
||||
WORD $0xbc0f; BYTE $0xf2 // bsf esi, edx
|
||||
WORD $0xeec1; BYTE $0x03 // shr esi, 3
|
||||
|
||||
LBB5_23:
|
||||
WORD $0xf189 // mov ecx, esi
|
||||
WORD $0x0148; BYTE $0xc8 // add rax, rcx
|
||||
|
||||
LBB5_24:
|
||||
WORD $0x014c; BYTE $0xc0 // add rax, r8
|
||||
|
||||
LBB5_25:
|
||||
VZEROUPPER
|
||||
MOVQ AX, ret+16(FP)
|
||||
RET
|
||||
|
|
|
@ -402,12 +402,13 @@ func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]b
|
|||
code = code.End.Next
|
||||
break
|
||||
}
|
||||
b = appendStructHead(ctx, b)
|
||||
mapCtx := encoder.NewMapContext(mlen)
|
||||
unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
|
||||
mapCtx := encoder.NewMapContext(mlen, unorderedMap)
|
||||
mapiterinit(code.Type, uptr, &mapCtx.Iter)
|
||||
store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
|
||||
ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
|
||||
if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
|
||||
b = appendStructHead(ctx, b)
|
||||
if unorderedMap {
|
||||
b = appendMapKeyIndent(ctx, code.Next, b)
|
||||
} else {
|
||||
mapCtx.Start = len(b)
|
||||
|
|
|
@ -402,12 +402,13 @@ func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]b
|
|||
code = code.End.Next
|
||||
break
|
||||
}
|
||||
b = appendStructHead(ctx, b)
|
||||
mapCtx := encoder.NewMapContext(mlen)
|
||||
unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
|
||||
mapCtx := encoder.NewMapContext(mlen, unorderedMap)
|
||||
mapiterinit(code.Type, uptr, &mapCtx.Iter)
|
||||
store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
|
||||
ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
|
||||
if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
|
||||
b = appendStructHead(ctx, b)
|
||||
if unorderedMap {
|
||||
b = appendMapKeyIndent(ctx, code.Next, b)
|
||||
} else {
|
||||
mapCtx.Start = len(b)
|
||||
|
|
|
@ -402,12 +402,13 @@ func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]b
|
|||
code = code.End.Next
|
||||
break
|
||||
}
|
||||
b = appendStructHead(ctx, b)
|
||||
mapCtx := encoder.NewMapContext(mlen)
|
||||
unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
|
||||
mapCtx := encoder.NewMapContext(mlen, unorderedMap)
|
||||
mapiterinit(code.Type, uptr, &mapCtx.Iter)
|
||||
store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
|
||||
ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
|
||||
if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
|
||||
b = appendStructHead(ctx, b)
|
||||
if unorderedMap {
|
||||
b = appendMapKeyIndent(ctx, code.Next, b)
|
||||
} else {
|
||||
mapCtx.Start = len(b)
|
||||
|
|
|
@ -402,12 +402,13 @@ func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]b
|
|||
code = code.End.Next
|
||||
break
|
||||
}
|
||||
b = appendStructHead(ctx, b)
|
||||
mapCtx := encoder.NewMapContext(mlen)
|
||||
unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
|
||||
mapCtx := encoder.NewMapContext(mlen, unorderedMap)
|
||||
mapiterinit(code.Type, uptr, &mapCtx.Iter)
|
||||
store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
|
||||
ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
|
||||
if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
|
||||
b = appendStructHead(ctx, b)
|
||||
if unorderedMap {
|
||||
b = appendMapKeyIndent(ctx, code.Next, b)
|
||||
} else {
|
||||
mapCtx.Start = len(b)
|
||||
|
|
Loading…
Reference in New Issue