From a51a35ae3232254685f26a0b6d995ca0e81e2248 Mon Sep 17 00:00:00 2001 From: Steven Scott <42449819+stevenscott89@users.noreply.github.com> Date: Sat, 6 Oct 2018 11:35:33 -0400 Subject: [PATCH] Improve header parsing code Because the net/http server removes \r\n from multi-line header values, there's no need to to check for \r or \n when skipping whitespace in headers (see https://godoc.org/net/textproto#Reader.ReadMIMEHeader). Given this fact, the whitespace test can be simplified to b == ' ' || b == '\t'. There's no need for the isSpaceOctet bit field in octetTypes. The isTokenOctet bit field is the only bit field remaining after the removal of isSpaceOctet. Simplify the code by replacing the isTokenOctet bit test in octetTypes with an array of booleans called isTokenOctet. Declare isTokenOctet as a composite literal instead of constructing it at runtime. Add documentation to core functions for parsing HTTP headers. --- util.go | 132 ++++++++++++++++++++++++++++++++++----------------- util_test.go | 1 + 2 files changed, 90 insertions(+), 43 deletions(-) diff --git a/util.go b/util.go index 354001e..7bf2f66 100644 --- a/util.go +++ b/util.go @@ -31,68 +31,113 @@ func generateChallengeKey() (string, error) { return base64.StdEncoding.EncodeToString(p), nil } -// Octet types from RFC 2616. -var octetTypes [256]byte - -const ( - isTokenOctet = 1 << iota - isSpaceOctet -) - -func init() { - // From RFC 2616 - // - // OCTET = - // CHAR = - // CTL = - // CR = - // LF = - // SP = - // HT = - // <"> = - // CRLF = CR LF - // LWS = [CRLF] 1*( SP | HT ) - // TEXT = - // separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <"> - // | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT - // token = 1* - // qdtext = > - - for c := 0; c < 256; c++ { - var t byte - isCtl := c <= 31 || c == 127 - isChar := 0 <= c && c <= 127 - isSeparator := strings.IndexRune(" \t\"(),/:;<=>?@[]\\{}", rune(c)) >= 0 - if strings.IndexRune(" \t\r\n", rune(c)) >= 0 { - t |= isSpaceOctet - } - if isChar && !isCtl && !isSeparator { - t |= isTokenOctet - } - octetTypes[c] = t - } +// Token octets per RFC 2616. +var isTokenOctet = [256]bool{ + '!': true, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '*': true, + '+': true, + '-': true, + '.': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'W': true, + 'V': true, + 'X': true, + 'Y': true, + 'Z': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '|': true, + '~': true, } +// skipSpace returns a slice of the string s with all leading RFC 2616 linear +// whitespace removed. func skipSpace(s string) (rest string) { i := 0 for ; i < len(s); i++ { - if octetTypes[s[i]]&isSpaceOctet == 0 { + if b := s[i]; b != ' ' && b != '\t' { break } } return s[i:] } +// nextToken returns the leading RFC 2616 token of s and the string following +// the token. func nextToken(s string) (token, rest string) { i := 0 for ; i < len(s); i++ { - if octetTypes[s[i]]&isTokenOctet == 0 { + if !isTokenOctet[s[i]] { break } } return s[:i], s[i:] } +// nextTokenOrQuoted returns the leading token or quoted string per RFC 2616 +// and the string following the token or quoted string. func nextTokenOrQuoted(s string) (value string, rest string) { if !strings.HasPrefix(s, "\"") { return nextToken(s) @@ -128,7 +173,8 @@ func nextTokenOrQuoted(s string) (value string, rest string) { return "", "" } -// equalASCIIFold returns true if s is equal to t with ASCII case folding. +// equalASCIIFold returns true if s is equal to t with ASCII case folding as +// defined in RFC 4790. func equalASCIIFold(s, t string) bool { for s != "" && t != "" { sr, size := utf8.DecodeRuneInString(s) diff --git a/util_test.go b/util_test.go index 6e15965..af710ba 100644 --- a/util_test.go +++ b/util_test.go @@ -17,6 +17,7 @@ var equalASCIIFoldTests = []struct { {"WebSocket", "websocket", true}, {"websocket", "WebSocket", true}, {"Öyster", "öyster", false}, + {"WebSocket", "WetSocket", false}, } func TestEqualASCIIFold(t *testing.T) {