Speed up fastscan package (#249)

jhump · web-flow · commit f62a9f6c9d3d · 2024-03-07T09:28:52.000-05:00
I looked a little bit into why the `fastscan` package was slower than I
expected. And it's because I accidentally left stuff in the lexer that
is totally unused but is the primary source of allocations and memory
usage (and thus also bad for latency since allocations aren't free).

Once upon a time, in an older version of the lexer in protoparse (on
which this fastscan lexer is based), this stuff was used to capture the
actual raw text for a token. The new lexer in protocompile uses a
completely different approach, to reduce the memory usage. But it's
completely unused here.

Removing it basically doubles the throughput of `fastscan` and causes it
to use 1/3rd as many allocations and 1/4th as much memory.

```
-- before --
BenchmarkGoogleapisFastScan-10    	       5	 210397558 ns/op	468123099 B/op	11217259 allocs/op
BenchmarkGoogleapisFastScan-10    	       5	 202723933 ns/op	468156548 B/op	11217275 allocs/op
-- after --
BenchmarkGoogleapisFastScan-10    	      12	  93760795 ns/op	111257610 B/op	 3710000 allocs/op
BenchmarkGoogleapisFastScan-10    	      12	  94548743 ns/op	111259758 B/op	 3710010 allocs/op
```
diff --git a/parser/fastscan/lexer.go b/parser/fastscan/lexer.go
@@ -78,7 +78,6 @@ func (t tokenType) describe() string {
 
 type runeReader struct {
 	rr     *bufio.Reader
-	marked []rune
 	unread []rune
 	err    error
 }
@@ -90,40 +89,19 @@ func (rr *runeReader) readRune() (r rune, err error) {
 	if len(rr.unread) > 0 {
 		r := rr.unread[len(rr.unread)-1]
 		rr.unread = rr.unread[:len(rr.unread)-1]
-		if rr.marked != nil {
-			rr.marked = append(rr.marked, r)
-		}
 		return r, nil
 	}
 	r, _, err = rr.rr.ReadRune()
 	if err != nil {
 		rr.err = err
-	} else if rr.marked != nil {
-		rr.marked = append(rr.marked, r)
 	}
 	return r, err
 }
 
 func (rr *runeReader) unreadRune(r rune) {
-	if rr.marked != nil {
-		if rr.marked[len(rr.marked)-1] != r {
-			panic("unread rune is not the same as last marked rune!")
-		}
-		rr.marked = rr.marked[:len(rr.marked)-1]
-	}
 	rr.unread = append(rr.unread, r)
 }
 
-func (rr *runeReader) startMark(initial rune) {
-	rr.marked = []rune{initial}
-}
-
-func (rr *runeReader) endMark() string {
-	m := string(rr.marked)
-	rr.marked = rr.marked[:0]
-	return m
-}
-
 type lexer struct {
 	input *runeReader
 	// start of the next rune in the input
@@ -161,8 +139,6 @@ func (l *lexer) adjustPos(c rune) {
 }
 
 func (l *lexer) Lex() (tokenType, any, error) {
-	l.input.endMark() // reset, just in case
-
 	for {
 		c, err := l.input.readRune()
 		if err == io.EOF {
@@ -181,7 +157,6 @@ func (l *lexer) Lex() (tokenType, any, error) {
 			continue
 		}
 
-		l.input.startMark(c)
 		l.prevTokenLine, l.prevTokenCol = l.curLine, l.curCol
 		l.adjustPos(c)
 		if c == '.' {