wip: trying bufio reader for extracting tokens

This commit is contained in:
2026-04-17 18:50:02 +01:00
parent 738743e4ec
commit 3af11b0a2a
2 changed files with 65 additions and 16 deletions
+57 -12
View File
@@ -1,8 +1,9 @@
package tokenizer package tokenizer
import ( import (
"fmt" "bufio"
"regexp" "regexp"
"slices"
) )
type TokenType int type TokenType int
@@ -15,7 +16,7 @@ const (
IDENTIFIER IDENTIFIER
) )
var KEYWORDS = [...]string{ var KEYWORDS = []string{
"class", "class",
"constructor", "constructor",
"function", "function",
@@ -24,7 +25,7 @@ var KEYWORDS = [...]string{
"static", "static",
"var", "var",
"int", "int",
"char", "read",
"boolean", "boolean",
"void", "void",
"true", "true",
@@ -39,7 +40,7 @@ var KEYWORDS = [...]string{
"return", "return",
} }
var SYMBOLS = [...]string{ var SYMBOLS = []string{
"{", "{",
"}", "}",
"(", "(",
@@ -66,17 +67,61 @@ type Token struct {
Type TokenType Type TokenType
} }
func ExtractTokens(tokens *[]Token, source []byte) error { func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error {
i := 0 read := ""
buf := []byte{}
for i < len(source) { for {
t := string(source[i]) _, err := reader.Read(buf)
read += string(buf)
if match, _ := regexp.MatchString("^[[:space:]]$", t); match { if err != nil {
i++ return err
}
if match, _ := regexp.MatchString("^[[:space:]]$", read); match {
continue
}
if read == "/" {
next, err := reader.ReadByte()
if err != nil {
return err
}
if string(next) == "/" || string(next) == "*" {
_, err := reader.ReadBytes('/')
if err != nil {
return err
}
} else { } else {
fmt.Println(t) *tokens = append(*tokens, Token{Value: read, Type: SYMBOL})
i++ read = ""
err := reader.UnreadByte()
if err != nil {
return err
}
continue
}
}
if slices.Contains(SYMBOLS, read) {
*tokens = append(*tokens, Token{Value: read, Type: SYMBOL})
read = ""
continue
}
if slices.Contains(KEYWORDS, read) {
*tokens = append(*tokens, Token{Value: read, Type: KEYWORD})
read = ""
continue
} }
} }
+7 -3
View File
@@ -1,17 +1,20 @@
package main package main
import ( import (
"bufio"
"io/fs" "io/fs"
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"strings" "strings"
"github.com/hazemKrimi/jack-compiler/internal/parser"
"github.com/hazemKrimi/jack-compiler/internal/tokenizer" "github.com/hazemKrimi/jack-compiler/internal/tokenizer"
) )
func process(inputPath string) error { func process(inputPath string) error {
source, err := os.ReadFile(inputPath) file, err := os.Open(inputPath)
reader := bufio.NewReader(file)
if err != nil { if err != nil {
return err return err
@@ -19,13 +22,14 @@ func process(inputPath string) error {
tokens := make([]tokenizer.Token, 0, 1000) tokens := make([]tokenizer.Token, 0, 1000)
if err := tokenizer.ExtractTokens(&tokens, source); err != nil { if err := tokenizer.ExtractTokens(&tokens, reader); err != nil {
return err return err
} }
parsed := parser.ParseTokens(tokens)
outputPath := strings.Replace(inputPath, ".jack", ".xml", 1) outputPath := strings.Replace(inputPath, ".jack", ".xml", 1)
if err := os.WriteFile(outputPath, source, 0644); err != nil { if err := os.WriteFile(outputPath, []byte(parsed), 0644); err != nil {
return err return err
} }