From dbda43cc7c86906f01e6569a78b9adf3e8254cf5 Mon Sep 17 00:00:00 2001 From: gobwas Date: Sat, 26 Dec 2015 12:14:30 +0300 Subject: [PATCH] utf-8 support --- lexer.go | 27 +++++++++++++-------------- lexer_test.go | 20 ++++++++++---------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/lexer.go b/lexer.go index e7095a4..76685c9 100644 --- a/lexer.go +++ b/lexer.go @@ -122,9 +122,8 @@ func (l *lexer) unread() { l.runes-- } -func (l *lexer) shift(i int) { - l.pos += i - l.start = l.pos +func (l *lexer) reset() { + l.pos = l.start l.runes = 0 } @@ -160,7 +159,7 @@ func (l *lexer) emit(t itemType) { l.width = 0 } -func (l *lexer) flush(t itemType) { +func (l *lexer) emitMaybe(t itemType) { if l.pos > l.start { l.emit(t) } @@ -202,15 +201,15 @@ func lexText(l *lexer) stateFn { } case single: l.unread() - l.flush(item_text) + l.emitMaybe(item_text) return lexSingle case any: l.unread() - l.flush(item_text) + l.emitMaybe(item_text) return lexAny case range_open: l.unread() - l.flush(item_text) + l.emitMaybe(item_text) return lexRangeOpen } @@ -236,22 +235,22 @@ func lexInsideRange(l *lexer) stateFn { switch c { case inside_range_not: // only first char makes sense - if l.pos-1 == l.start { + if l.pos-l.width == l.start { l.emit(item_range_not) } case inside_range_minus: - if l.pos-l.start != 2 { + if l.runes != 2 { l.errorf("unexpected length of lo char inside range") return nil } - l.shift(-2) + l.reset() return lexRangeHiLo case range_close: l.unread() - l.flush(item_range_chars) + l.emitMaybe(item_range_chars) return lexRangeClose } } @@ -275,7 +274,7 @@ func lexRangeHiLo(l *lexer) stateFn { switch c { case inside_range_minus: - if l.pos-l.start != 1 { + if l.runes != 1 { l.errorf("unexpected length of range: single character expected before minus") return nil } @@ -285,7 +284,7 @@ func lexRangeHiLo(l *lexer) stateFn { case range_close: l.unread() - if l.pos-l.start != 1 { + if l.runes != 1 { l.errorf("unexpected length of range: single character expected before close") return nil } @@ -298,7 +297,7 @@ func lexRangeHiLo(l *lexer) stateFn { continue } - if l.pos-l.start != 1 { + if l.runes != 1 { l.errorf("unexpected length of range: single character expected at the begining") return nil } diff --git a/lexer_test.go b/lexer_test.go index 69f3703..63fc5b1 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -42,43 +42,43 @@ func TestLexGood(t *testing.T) { }, }, { - pattern: "[a-b]", + pattern: "[日-語]", items: []item{ item{item_range_open, "["}, - item{item_range_lo, "a"}, + item{item_range_lo, "日"}, item{item_range_minus, "-"}, - item{item_range_hi, "b"}, + item{item_range_hi, "語"}, item{item_range_close, "]"}, item{item_eof, ""}, }, }, { - pattern: "[!a-b]", + pattern: "[!日-語]", items: []item{ item{item_range_open, "["}, item{item_range_not, "!"}, - item{item_range_lo, "a"}, + item{item_range_lo, "日"}, item{item_range_minus, "-"}, - item{item_range_hi, "b"}, + item{item_range_hi, "語"}, item{item_range_close, "]"}, item{item_eof, ""}, }, }, { - pattern: "[abc]", + pattern: "[日本語]", items: []item{ item{item_range_open, "["}, - item{item_range_chars, "abc"}, + item{item_range_chars, "日本語"}, item{item_range_close, "]"}, item{item_eof, ""}, }, }, { - pattern: "[!abc]", + pattern: "[!日本語]", items: []item{ item{item_range_open, "["}, item{item_range_not, "!"}, - item{item_range_chars, "abc"}, + item{item_range_chars, "日本語"}, item{item_range_close, "]"}, item{item_eof, ""}, },