From 3af11b0a2a2e94a40b7bac95ade1ebe0475c0185 Mon Sep 17 00:00:00 2001 From: Hazem Krimi Date: Fri, 17 Apr 2026 18:50:02 +0100 Subject: [PATCH] wip: trying bufio reader for extracting tokens --- internal/tokenizer/tokenizer.go | 71 +++++++++++++++++++++++++++------ main.go | 10 +++-- 2 files changed, 65 insertions(+), 16 deletions(-) diff --git a/internal/tokenizer/tokenizer.go b/internal/tokenizer/tokenizer.go index 49c3f4e..f6818d4 100644 --- a/internal/tokenizer/tokenizer.go +++ b/internal/tokenizer/tokenizer.go @@ -1,8 +1,9 @@ package tokenizer import ( - "fmt" + "bufio" "regexp" + "slices" ) type TokenType int @@ -15,7 +16,7 @@ const ( IDENTIFIER ) -var KEYWORDS = [...]string{ +var KEYWORDS = []string{ "class", "constructor", "function", @@ -24,7 +25,7 @@ var KEYWORDS = [...]string{ "static", "var", "int", - "char", + "read", "boolean", "void", "true", @@ -39,7 +40,7 @@ var KEYWORDS = [...]string{ "return", } -var SYMBOLS = [...]string{ +var SYMBOLS = []string{ "{", "}", "(", @@ -66,17 +67,61 @@ type Token struct { Type TokenType } -func ExtractTokens(tokens *[]Token, source []byte) error { - i := 0 +func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error { + read := "" + buf := []byte{} - for i < len(source) { - t := string(source[i]) + for { + _, err := reader.Read(buf) + read += string(buf) - if match, _ := regexp.MatchString("^[[:space:]]$", t); match { - i++ - } else { - fmt.Println(t) - i++ + if err != nil { + return err + } + + if match, _ := regexp.MatchString("^[[:space:]]$", read); match { + continue + } + + if read == "/" { + next, err := reader.ReadByte() + + if err != nil { + return err + } + + if string(next) == "/" || string(next) == "*" { + _, err := reader.ReadBytes('/') + + if err != nil { + return err + } + } else { + *tokens = append(*tokens, Token{Value: read, Type: SYMBOL}) + read = "" + + err := reader.UnreadByte() + + if err != nil { + return err + } + + continue + } + } + + if slices.Contains(SYMBOLS, read) { + *tokens = append(*tokens, Token{Value: read, Type: SYMBOL}) + read = "" + + continue + } + + if slices.Contains(KEYWORDS, read) { + *tokens = append(*tokens, Token{Value: read, Type: KEYWORD}) + read = "" + + continue } } diff --git a/main.go b/main.go index 246e870..849c342 100644 --- a/main.go +++ b/main.go @@ -1,17 +1,20 @@ package main import ( + "bufio" "io/fs" "os" "path/filepath" "regexp" "strings" + "github.com/hazemKrimi/jack-compiler/internal/parser" "github.com/hazemKrimi/jack-compiler/internal/tokenizer" ) func process(inputPath string) error { - source, err := os.ReadFile(inputPath) + file, err := os.Open(inputPath) + reader := bufio.NewReader(file) if err != nil { return err @@ -19,13 +22,14 @@ func process(inputPath string) error { tokens := make([]tokenizer.Token, 0, 1000) - if err := tokenizer.ExtractTokens(&tokens, source); err != nil { + if err := tokenizer.ExtractTokens(&tokens, reader); err != nil { return err } + parsed := parser.ParseTokens(tokens) outputPath := strings.Replace(inputPath, ".jack", ".xml", 1) - if err := os.WriteFile(outputPath, source, 0644); err != nil { + if err := os.WriteFile(outputPath, []byte(parsed), 0644); err != nil { return err }