Fix string.c

This commit is contained in:
Masaaki Goshima 2021-12-29 21:23:36 +09:00
parent e736de7070
commit 68c500590e
No known key found for this signature in database
GPG Key ID: 6A53785055537153
3 changed files with 49 additions and 16 deletions

View File

@ -1,6 +1,6 @@
.PHONY: asm .PHONY: asm
asm: asm:
clang -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c clang -Wall -S -O2 -mavx2 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -c ./simd/string.c
.PHONY: generate .PHONY: generate
generate: generate:

View File

@ -1,8 +1,29 @@
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include <stdbool.h>
#include <immintrin.h> #include <immintrin.h>
static const bool needEscape[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60-0x6F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70-0x7F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80-0x8F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90-0x9F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xA0-0xAF
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xB0-0xBF
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xC0-0xCF
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xD0-0xDF
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xE0-0xEF
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xF0-0xFF
};
uint64_t findHTMLEscapeIndex64(char *buf, int len) { uint64_t findHTMLEscapeIndex64(char *buf, int len) {
static const uint64_t lsb = 0x0101010101010101; static const uint64_t lsb = 0x0101010101010101;
static const uint64_t msb = 0x8080808080808080; static const uint64_t msb = 0x8080808080808080;
@ -26,7 +47,7 @@ uint64_t findHTMLEscapeIndex64(char *buf, int len) {
} }
sp += 8; sp += 8;
} }
return 8 * chunkLen; return chunkIdx * 8;
} }
uint64_t findHTMLEscapeIndex128(char *buf, int len) { uint64_t findHTMLEscapeIndex128(char *buf, int len) {
@ -40,7 +61,6 @@ uint64_t findHTMLEscapeIndex128(char *buf, int len) {
static const __m64 gt = (__m64)(lsb * '>'); static const __m64 gt = (__m64)(lsb * '>');
static const __m64 amp = (__m64)(lsb * '&'); static const __m64 amp = (__m64)(lsb * '&');
__m128i zeroV = _mm_setzero_si128();
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb)); __m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb)); __m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
__m128i spaceV = _mm_set_epi64(space, space); __m128i spaceV = _mm_set_epi64(space, space);
@ -87,7 +107,6 @@ uint64_t findHTMLEscapeIndex256(char *buf, int len) {
static const __m64 gt = (__m64)(lsb * '>'); static const __m64 gt = (__m64)(lsb * '>');
static const __m64 amp = (__m64)(lsb * '&'); static const __m64 amp = (__m64)(lsb * '&');
__m256i zeroV = _mm256_setzero_si256();
__m256i msbV = _mm256_set1_epi64x(msb); __m256i msbV = _mm256_set1_epi64x(msb);
__m256i lsbV = _mm256_set1_epi64x(lsb); __m256i lsbV = _mm256_set1_epi64x(lsb);
__m256i spaceV = _mm256_set1_epi64x(space); __m256i spaceV = _mm256_set1_epi64x(space);
@ -146,7 +165,14 @@ uint64_t findEscapeIndex64(char *buf, int len) {
} }
sp += 8; sp += 8;
} }
return 8 * chunkLen; int idx = 8 * chunkLen;
bool *needEscape = needEscape;
for ( ;idx < len; idx++) {
if (needEscape[buf[idx]] != 0) {
return idx;
}
}
return len;
} }
uint64_t findEscapeIndex128(char *buf, int len) { uint64_t findEscapeIndex128(char *buf, int len) {
@ -157,7 +183,6 @@ uint64_t findEscapeIndex128(char *buf, int len) {
static const __m64 quote = (__m64)(lsb * '"'); static const __m64 quote = (__m64)(lsb * '"');
static const __m64 escape = (__m64)(lsb * '\\'); static const __m64 escape = (__m64)(lsb * '\\');
__m128i zeroV = _mm_setzero_si128();
__m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb)); __m128i msbV = _mm_set_epi64((__m64)(msb), (__m64)(msb));
__m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb)); __m128i lsbV = _mm_set_epi64((__m64)(lsb), (__m64)(lsb));
__m128i spaceV = _mm_set_epi64(space, space); __m128i spaceV = _mm_set_epi64(space, space);
@ -181,10 +206,17 @@ uint64_t findEscapeIndex128(char *buf, int len) {
sp += 16; sp += 16;
} }
int idx = 16 * chunkLen; int idx = 16 * chunkLen;
if (len - idx >= 8) { int remainLen = len - idx;
return idx + findEscapeIndex64(sp, len - idx); if (remainLen >= 8) {
return idx + findEscapeIndex64(sp, remainLen);
} }
bool *needEscape = needEscape;
for (; idx < len; idx++) {
if (needEscape[buf[idx]] != 0) {
return idx; return idx;
}
}
return len;
} }
uint64_t findEscapeIndex256(char *buf, int len) { uint64_t findEscapeIndex256(char *buf, int len) {
@ -195,7 +227,6 @@ uint64_t findEscapeIndex256(char *buf, int len) {
static const __m64 quote = (__m64)(lsb * '"'); static const __m64 quote = (__m64)(lsb * '"');
static const __m64 escape = (__m64)(lsb * '\\'); static const __m64 escape = (__m64)(lsb * '\\');
__m256i zeroV = _mm256_setzero_si256();
__m256i msbV = _mm256_set1_epi64x(msb); __m256i msbV = _mm256_set1_epi64x(msb);
__m256i lsbV = _mm256_set1_epi64x(lsb); __m256i lsbV = _mm256_set1_epi64x(lsb);
__m256i spaceV = _mm256_set1_epi64x(space); __m256i spaceV = _mm256_set1_epi64x(space);
@ -214,7 +245,7 @@ uint64_t findEscapeIndex256(char *buf, int len) {
__m256i mask = _mm256_or_si256(_mm256_or_si256(_mm256_or_si256(n, spaceN), quoteN), escapeN); __m256i mask = _mm256_or_si256(_mm256_or_si256(_mm256_or_si256(n, spaceN), quoteN), escapeN);
int movemask = _mm256_movemask_epi8(_mm256_and_si256(mask, msbV)); int movemask = _mm256_movemask_epi8(_mm256_and_si256(mask, msbV));
if (movemask != 0) { if (movemask != 0) {
return __builtin_ctz(movemask); return __builtin_ctz(movemask) + chunkIdx * 32;
} }
sp += 32; sp += 32;
} }
@ -225,5 +256,11 @@ uint64_t findEscapeIndex256(char *buf, int len) {
} else if (remainLen >= 8) { } else if (remainLen >= 8) {
return idx + findEscapeIndex64(sp, remainLen); return idx + findEscapeIndex64(sp, remainLen);
} }
bool *needEscape = needEscape;
for (; idx < len; idx++) {
if (needEscape[buf[idx]] != 0) {
return idx; return idx;
}
}
return len;
} }

View File

@ -758,10 +758,6 @@ func appendString(buf []byte, s string) []byte {
} }
ESCAPE: ESCAPE:
c := s[j] c := s[j]
if !needEscape[c] {
j++
continue
}
switch c { switch c {
case '\\', '"': case '\\', '"':
buf = append(buf, s[i:j]...) buf = append(buf, s[i:j]...)