diff --git a/internal/cmd/generator/Makefile b/internal/cmd/generator/Makefile index f1e0c86..a634370 100644 --- a/internal/cmd/generator/Makefile +++ b/internal/cmd/generator/Makefile @@ -1,6 +1,6 @@ .PHONY: asm asm: - clang -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c + clang -Wall -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c .PHONY: generate generate: diff --git a/internal/cmd/generator/simd/string.c b/internal/cmd/generator/simd/string.c index ebbb10b..0a6314e 100644 --- a/internal/cmd/generator/simd/string.c +++ b/internal/cmd/generator/simd/string.c @@ -1,8 +1,29 @@ #include #include #include +#include #include +static const bool needEscape[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60-0x6F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70-0x7F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80-0x8F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90-0x9F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xA0-0xAF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xB0-0xBF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xC0-0xCF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xD0-0xDF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xE0-0xEF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xF0-0xFF + }; + uint64_t findHTMLEscapeIndex64(char *buf, int len) { static const uint64_t lsb = 0x0101010101010101; static const uint64_t msb = 0x8080808080808080; @@ -26,7 +47,7 @@ uint64_t findHTMLEscapeIndex64(char *buf, int len) { } sp += 8; } - return 8 * chunkLen; + return chunkIdx * 8; } uint64_t findHTMLEscapeIndex128(char *buf, int len) { @@ -40,7 +61,6 @@ uint64_t findHTMLEscapeIndex128(char *buf, int len) { static const __m64 gt = (__m64)(lsb * '>'); static const __m64 amp = (__m64)(lsb * '&'); - __m128i zeroV = _mm_setzero_si128(); __m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb)); __m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb)); __m128i spaceV = _mm_set_epi64(space, space); @@ -87,7 +107,6 @@ uint64_t findHTMLEscapeIndex256(char *buf, int len) { static const __m64 gt = (__m64)(lsb * '>'); static const __m64 amp = (__m64)(lsb * '&'); - __m256i zeroV = _mm256_setzero_si256(); __m256i msbV = _mm256_set1_epi64x(msb); __m256i lsbV = _mm256_set1_epi64x(lsb); __m256i spaceV = _mm256_set1_epi64x(space); @@ -146,7 +165,14 @@ uint64_t findEscapeIndex64(char *buf, int len) { } sp += 8; } - return 8 * chunkLen; + int idx = 8 * chunkLen; + bool *needEscape = needEscape; + for ( ;idx < len; idx++) { + if (needEscape[buf[idx]] != 0) { + return idx; + } + } + return len; } uint64_t findEscapeIndex128(char *buf, int len) { @@ -157,7 +183,6 @@ uint64_t findEscapeIndex128(char *buf, int len) { static const __m64 quote = (__m64)(lsb * '"'); static const __m64 escape = (__m64)(lsb * '\\'); - __m128i zeroV = _mm_setzero_si128(); __m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb)); __m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb)); __m128i spaceV = _mm_set_epi64(space, space); @@ -181,10 +206,17 @@ uint64_t findEscapeIndex128(char *buf, int len) { sp += 16; } int idx = 16 * chunkLen; - if (len - idx >= 8) { - return idx + findEscapeIndex64(sp, len - idx); + int remainLen = len - idx; + if (remainLen >= 8) { + return idx + findEscapeIndex64(sp, remainLen); } - return idx; + bool *needEscape = needEscape; + for (; idx < len; idx++) { + if (needEscape[buf[idx]] != 0) { + return idx; + } + } + return len; } uint64_t findEscapeIndex256(char *buf, int len) { @@ -195,7 +227,6 @@ uint64_t findEscapeIndex256(char *buf, int len) { static const __m64 quote = (__m64)(lsb * '"'); static const __m64 escape = (__m64)(lsb * '\\'); - __m256i zeroV = _mm256_setzero_si256(); __m256i msbV = _mm256_set1_epi64x(msb); __m256i lsbV = _mm256_set1_epi64x(lsb); __m256i spaceV = _mm256_set1_epi64x(space); @@ -214,7 +245,7 @@ uint64_t findEscapeIndex256(char *buf, int len) { __m256i mask = _mm256_or_si256(_mm256_or_si256(_mm256_or_si256(n, spaceN), quoteN), escapeN); int movemask = _mm256_movemask_epi8(_mm256_and_si256(mask, msbV)); if (movemask != 0) { - return __builtin_ctz(movemask); + return __builtin_ctz(movemask) + chunkIdx * 32; } sp += 32; } @@ -225,5 +256,11 @@ uint64_t findEscapeIndex256(char *buf, int len) { } else if (remainLen >= 8) { return idx + findEscapeIndex64(sp, remainLen); } - return idx; + bool *needEscape = needEscape; + for (; idx < len; idx++) { + if (needEscape[buf[idx]] != 0) { + return idx; + } + } + return len; } diff --git a/internal/encoder/string.go b/internal/encoder/string.go index 888d09b..8072a43 100644 --- a/internal/encoder/string.go +++ b/internal/encoder/string.go @@ -758,10 +758,6 @@ func appendString(buf []byte, s string) []byte { } ESCAPE: c := s[j] - if !needEscape[c] { - j++ - continue - } switch c { case '\\', '"': buf = append(buf, s[i:j]...)