forked from mirror/go-json
Fix string.c
This commit is contained in:
parent
e736de7070
commit
68c500590e
|
@ -1,6 +1,6 @@
|
||||||
.PHONY: asm
|
.PHONY: asm
|
||||||
asm:
|
asm:
|
||||||
clang -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c
|
clang -Wall -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c
|
||||||
|
|
||||||
.PHONY: generate
|
.PHONY: generate
|
||||||
generate:
|
generate:
|
||||||
|
|
|
@ -1,8 +1,29 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
static const bool needEscape[256] = {
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
|
||||||
|
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60-0x6F
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70-0x7F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80-0x8F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90-0x9F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xA0-0xAF
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xB0-0xBF
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xC0-0xCF
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xD0-0xDF
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xE0-0xEF
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xF0-0xFF
|
||||||
|
};
|
||||||
|
|
||||||
uint64_t findHTMLEscapeIndex64(char *buf, int len) {
|
uint64_t findHTMLEscapeIndex64(char *buf, int len) {
|
||||||
static const uint64_t lsb = 0x0101010101010101;
|
static const uint64_t lsb = 0x0101010101010101;
|
||||||
static const uint64_t msb = 0x8080808080808080;
|
static const uint64_t msb = 0x8080808080808080;
|
||||||
|
@ -26,7 +47,7 @@ uint64_t findHTMLEscapeIndex64(char *buf, int len) {
|
||||||
}
|
}
|
||||||
sp += 8;
|
sp += 8;
|
||||||
}
|
}
|
||||||
return 8 * chunkLen;
|
return chunkIdx * 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t findHTMLEscapeIndex128(char *buf, int len) {
|
uint64_t findHTMLEscapeIndex128(char *buf, int len) {
|
||||||
|
@ -40,7 +61,6 @@ uint64_t findHTMLEscapeIndex128(char *buf, int len) {
|
||||||
static const __m64 gt = (__m64)(lsb * '>');
|
static const __m64 gt = (__m64)(lsb * '>');
|
||||||
static const __m64 amp = (__m64)(lsb * '&');
|
static const __m64 amp = (__m64)(lsb * '&');
|
||||||
|
|
||||||
__m128i zeroV = _mm_setzero_si128();
|
|
||||||
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
|
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
|
||||||
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
|
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
|
||||||
__m128i spaceV = _mm_set_epi64(space, space);
|
__m128i spaceV = _mm_set_epi64(space, space);
|
||||||
|
@ -87,7 +107,6 @@ uint64_t findHTMLEscapeIndex256(char *buf, int len) {
|
||||||
static const __m64 gt = (__m64)(lsb * '>');
|
static const __m64 gt = (__m64)(lsb * '>');
|
||||||
static const __m64 amp = (__m64)(lsb * '&');
|
static const __m64 amp = (__m64)(lsb * '&');
|
||||||
|
|
||||||
__m256i zeroV = _mm256_setzero_si256();
|
|
||||||
__m256i msbV = _mm256_set1_epi64x(msb);
|
__m256i msbV = _mm256_set1_epi64x(msb);
|
||||||
__m256i lsbV = _mm256_set1_epi64x(lsb);
|
__m256i lsbV = _mm256_set1_epi64x(lsb);
|
||||||
__m256i spaceV = _mm256_set1_epi64x(space);
|
__m256i spaceV = _mm256_set1_epi64x(space);
|
||||||
|
@ -146,7 +165,14 @@ uint64_t findEscapeIndex64(char *buf, int len) {
|
||||||
}
|
}
|
||||||
sp += 8;
|
sp += 8;
|
||||||
}
|
}
|
||||||
return 8 * chunkLen;
|
int idx = 8 * chunkLen;
|
||||||
|
bool *needEscape = needEscape;
|
||||||
|
for ( ;idx < len; idx++) {
|
||||||
|
if (needEscape[buf[idx]] != 0) {
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t findEscapeIndex128(char *buf, int len) {
|
uint64_t findEscapeIndex128(char *buf, int len) {
|
||||||
|
@ -157,7 +183,6 @@ uint64_t findEscapeIndex128(char *buf, int len) {
|
||||||
static const __m64 quote = (__m64)(lsb * '"');
|
static const __m64 quote = (__m64)(lsb * '"');
|
||||||
static const __m64 escape = (__m64)(lsb * '\\');
|
static const __m64 escape = (__m64)(lsb * '\\');
|
||||||
|
|
||||||
__m128i zeroV = _mm_setzero_si128();
|
|
||||||
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
|
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
|
||||||
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
|
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
|
||||||
__m128i spaceV = _mm_set_epi64(space, space);
|
__m128i spaceV = _mm_set_epi64(space, space);
|
||||||
|
@ -181,10 +206,17 @@ uint64_t findEscapeIndex128(char *buf, int len) {
|
||||||
sp += 16;
|
sp += 16;
|
||||||
}
|
}
|
||||||
int idx = 16 * chunkLen;
|
int idx = 16 * chunkLen;
|
||||||
if (len - idx >= 8) {
|
int remainLen = len - idx;
|
||||||
return idx + findEscapeIndex64(sp, len - idx);
|
if (remainLen >= 8) {
|
||||||
|
return idx + findEscapeIndex64(sp, remainLen);
|
||||||
}
|
}
|
||||||
return idx;
|
bool *needEscape = needEscape;
|
||||||
|
for (; idx < len; idx++) {
|
||||||
|
if (needEscape[buf[idx]] != 0) {
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t findEscapeIndex256(char *buf, int len) {
|
uint64_t findEscapeIndex256(char *buf, int len) {
|
||||||
|
@ -195,7 +227,6 @@ uint64_t findEscapeIndex256(char *buf, int len) {
|
||||||
static const __m64 quote = (__m64)(lsb * '"');
|
static const __m64 quote = (__m64)(lsb * '"');
|
||||||
static const __m64 escape = (__m64)(lsb * '\\');
|
static const __m64 escape = (__m64)(lsb * '\\');
|
||||||
|
|
||||||
__m256i zeroV = _mm256_setzero_si256();
|
|
||||||
__m256i msbV = _mm256_set1_epi64x(msb);
|
__m256i msbV = _mm256_set1_epi64x(msb);
|
||||||
__m256i lsbV = _mm256_set1_epi64x(lsb);
|
__m256i lsbV = _mm256_set1_epi64x(lsb);
|
||||||
__m256i spaceV = _mm256_set1_epi64x(space);
|
__m256i spaceV = _mm256_set1_epi64x(space);
|
||||||
|
@ -214,7 +245,7 @@ uint64_t findEscapeIndex256(char *buf, int len) {
|
||||||
__m256i mask = _mm256_or_si256(_mm256_or_si256(_mm256_or_si256(n, spaceN), quoteN), escapeN);
|
__m256i mask = _mm256_or_si256(_mm256_or_si256(_mm256_or_si256(n, spaceN), quoteN), escapeN);
|
||||||
int movemask = _mm256_movemask_epi8(_mm256_and_si256(mask, msbV));
|
int movemask = _mm256_movemask_epi8(_mm256_and_si256(mask, msbV));
|
||||||
if (movemask != 0) {
|
if (movemask != 0) {
|
||||||
return __builtin_ctz(movemask);
|
return __builtin_ctz(movemask) + chunkIdx * 32;
|
||||||
}
|
}
|
||||||
sp += 32;
|
sp += 32;
|
||||||
}
|
}
|
||||||
|
@ -225,5 +256,11 @@ uint64_t findEscapeIndex256(char *buf, int len) {
|
||||||
} else if (remainLen >= 8) {
|
} else if (remainLen >= 8) {
|
||||||
return idx + findEscapeIndex64(sp, remainLen);
|
return idx + findEscapeIndex64(sp, remainLen);
|
||||||
}
|
}
|
||||||
return idx;
|
bool *needEscape = needEscape;
|
||||||
|
for (; idx < len; idx++) {
|
||||||
|
if (needEscape[buf[idx]] != 0) {
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len;
|
||||||
}
|
}
|
||||||
|
|
|
@ -758,10 +758,6 @@ func appendString(buf []byte, s string) []byte {
|
||||||
}
|
}
|
||||||
ESCAPE:
|
ESCAPE:
|
||||||
c := s[j]
|
c := s[j]
|
||||||
if !needEscape[c] {
|
|
||||||
j++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
switch c {
|
switch c {
|
||||||
case '\\', '"':
|
case '\\', '"':
|
||||||
buf = append(buf, s[i:j]...)
|
buf = append(buf, s[i:j]...)
|
||||||
|
|
Loading…
Reference in New Issue