From 37e60d41d6644ad8c42ad049ee443161b23c98e4 Mon Sep 17 00:00:00 2001 From: Hazem Krimi Date: Wed, 22 Apr 2026 11:27:24 +0100 Subject: [PATCH] chore: functions for creating and writing tokens --- .../compilation-engine/compilation-engine.go | 227 +++++------------- internal/tokenizer/tokenizer.go | 45 +++- 2 files changed, 103 insertions(+), 169 deletions(-) diff --git a/internal/compilation-engine/compilation-engine.go b/internal/compilation-engine/compilation-engine.go index 438ac74..64c289d 100644 --- a/internal/compilation-engine/compilation-engine.go +++ b/internal/compilation-engine/compilation-engine.go @@ -8,53 +8,51 @@ import ( "github.com/hazemKrimi/jack-compiler/internal/tokenizer" ) +func WriteToken(output *strings.Builder, token tokenizer.Token, index *int) error { + if _, err := output.WriteString("<" + token.XML + "> " + token.Value + " \n"); err != nil { + return err + } + + (*index)++ + + return nil +} + func compileClassVarDec(output *strings.Builder, tokens []tokenizer.Token, index *int) error { if tokens[*index].Type != tokenizer.KEYWORD || !slices.Contains([]string{"static", "field"}, tokens[*index].Value) { return nil } output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if !slices.Contains([]tokenizer.TokenType{tokenizer.KEYWORD, tokenizer.IDENTIFIER}, tokens[*index].Type) && !slices.Contains([]string{"int", "char", "boolean"}, tokens[*index].Value) { return errors.New("Invalid variable type name!") } - if tokens[*index].Type == tokenizer.KEYWORD { - output.WriteString(" " + tokens[*index].Value + " \n") - } else { - output.WriteString(" " + tokens[*index].Value + " \n") - } - - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) for tokens[*index].Type == tokenizer.SYMBOL && tokens[*index].Value == "," { - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ + WriteToken(output, tokens[*index], index) } if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != ";" { return errors.New("Missing semicolon!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") return compileClassVarDec(output, tokens, index) @@ -65,24 +63,16 @@ func compileParameterList(output *strings.Builder, tokens []tokenizer.Token, ind return nil } - if tokens[*index].Type == tokenizer.KEYWORD { - output.WriteString(" " + tokens[*index].Value + " \n") - } else { - output.WriteString(" " + tokens[*index].Value + " \n") - } - - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type == tokenizer.SYMBOL && tokens[*index].Value == "," { - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) return compileParameterList(output, tokens, index) } @@ -97,47 +87,35 @@ func compileVariableDeclaration(output *strings.Builder, tokens []tokenizer.Toke output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if !slices.Contains([]tokenizer.TokenType{tokenizer.KEYWORD, tokenizer.IDENTIFIER}, tokens[*index].Type) && !slices.Contains([]string{"int", "char", "boolean"}, tokens[*index].Value) { return errors.New("Invalid variable type name!") } - if tokens[*index].Type == tokenizer.KEYWORD { - output.WriteString(" " + tokens[*index].Value + " \n") - } else { - output.WriteString(" " + tokens[*index].Value + " \n") - } - - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) for tokens[*index].Type == tokenizer.SYMBOL && tokens[*index].Value == "," { - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ + WriteToken(output, tokens[*index], index) } if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != ";" { return errors.New("Missing semicolon!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") return compileVariableDeclaration(output, tokens, index) @@ -150,36 +128,9 @@ func compileExpression(output *strings.Builder, tokens []tokenizer.Token, index output.WriteString("\n") output.WriteString("\n") - - switch tokens[*index].Type { - // case tokenizer.SYMBOL: - // var value string - // - // switch tokens[*index].Value { - // case "<": - // value = "<" - // case ">": - // value = ">" - // case "&": - // value = "&" - // default: - // value = tokens[*index].Value - // } - // - // output.WriteString(" " + value + " \n") - case tokenizer.KEYWORD: - output.WriteString(" " + tokens[*index].Value + " \n") - case tokenizer.IDENTIFIER: - output.WriteString(" " + tokens[*index].Value + " \n") - case tokenizer.INT_CONST: - output.WriteString(" " + tokens[*index].Value + " \n") - case tokenizer.STR_CONST: - output.WriteString(" " + tokens[*index].Value + " \n") - } - + WriteToken(output, tokens[*index], index) output.WriteString("\n") output.WriteString("\n") - *(index)++ return nil } @@ -191,8 +142,7 @@ func compileExpressionList(output *strings.Builder, tokens []tokenizer.Token, in } if tokens[*index].Type == tokenizer.SYMBOL && tokens[*index].Value == "," { - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ + WriteToken(output, tokens[*index], index) return compileExpressionList(output, tokens, index) } @@ -208,22 +158,19 @@ func compileLetStatement(output *strings.Builder, tokens []tokenizer.Token, inde output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "=" { return errors.New("Missing assignment!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ + WriteToken(output, tokens[*index], index) if err := compileExpression(output, tokens, index); err != nil { return err @@ -233,9 +180,7 @@ func compileLetStatement(output *strings.Builder, tokens []tokenizer.Token, inde return errors.New("Missing semicolon!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") return nil @@ -248,15 +193,13 @@ func compileIfStatement(output *strings.Builder, tokens []tokenizer.Token, index output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "(" { return errors.New("Missing if statement opening parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if err := compileExpression(output, tokens, index); err != nil { return err @@ -266,15 +209,13 @@ func compileIfStatement(output *strings.Builder, tokens []tokenizer.Token, index return errors.New("Missing if statement closing parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "{" { return errors.New("Missing if statement opening curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) output.WriteString("\n") @@ -288,20 +229,16 @@ func compileIfStatement(output *strings.Builder, tokens []tokenizer.Token, index return errors.New("Missing if statement closing curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type == tokenizer.KEYWORD && tokens[*index].Value == "else" { - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "{" { return errors.New("Missing if statement opening curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") if err := compileStatements(output, tokens, index); err != nil { @@ -314,8 +251,7 @@ func compileIfStatement(output *strings.Builder, tokens []tokenizer.Token, index return errors.New("Missing if statement closing curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) } output.WriteString("\n") @@ -330,15 +266,13 @@ func compileWhileStatement(output *strings.Builder, tokens []tokenizer.Token, in output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "(" { return errors.New("Missing while statement opening parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if err := compileExpression(output, tokens, index); err != nil { return err @@ -348,15 +282,13 @@ func compileWhileStatement(output *strings.Builder, tokens []tokenizer.Token, in return errors.New("Missing while statement closing parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "{" { return errors.New("Missing while statement opening curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) output.WriteString("\n") @@ -370,8 +302,7 @@ func compileWhileStatement(output *strings.Builder, tokens []tokenizer.Token, in return errors.New("Missing while statement closing curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) output.WriteString("\n") @@ -385,34 +316,29 @@ func compileDoStatement(output *strings.Builder, tokens []tokenizer.Token, index output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type == tokenizer.SYMBOL && tokens[*index].Value == "." { - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid variable name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) } if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "(" { return errors.New("Missing subroutine call opening parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) output.WriteString("\n") @@ -426,16 +352,13 @@ func compileDoStatement(output *strings.Builder, tokens []tokenizer.Token, index return errors.New("Missing subroutine call closing parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != ";" { return errors.New("Missing semicolon!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") return nil @@ -448,8 +371,7 @@ func compileReturnStatement(output *strings.Builder, tokens []tokenizer.Token, i output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if slices.Contains([]tokenizer.TokenType{tokenizer.KEYWORD, tokenizer.IDENTIFIER, tokenizer.INT_CONST, tokenizer.STR_CONST}, tokens[*index].Type) { if err := compileExpression(output, tokens, index); err != nil { @@ -461,9 +383,7 @@ func compileReturnStatement(output *strings.Builder, tokens []tokenizer.Token, i return errors.New("Missing semicolon!") } - output.WriteString(" " + tokens[*index].Value + " \n") - (*index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") return nil @@ -527,35 +447,25 @@ func compileSubroutineDeclaration(output *strings.Builder, tokens []tokenizer.To output.WriteString("\n") - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if !slices.Contains([]tokenizer.TokenType{tokenizer.KEYWORD, tokenizer.IDENTIFIER}, tokens[*index].Type) && !slices.Contains([]string{"void", "int", "char", "boolean"}, tokens[*index].Value) { return errors.New("Invalid subroutine return type!") } - if tokens[*index].Type == tokenizer.KEYWORD { - output.WriteString(" " + tokens[*index].Value + " \n") - } else { - output.WriteString(" " + tokens[*index].Value + " \n") - } - - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid subroutine name!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "(" { return errors.New("Missing subroutine opening parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") if err := compileParameterList(output, tokens, index); err != nil { @@ -568,17 +478,15 @@ func compileSubroutineDeclaration(output *strings.Builder, tokens []tokenizer.To return errors.New("Missing subroutine closing parenthese!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ - + + WriteToken(output, tokens[*index], index) output.WriteString("\n") if tokens[*index].Type != tokenizer.SYMBOL || tokens[*index].Value != "{" { return errors.New("Missing subroutine opening curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ + WriteToken(output, tokens[*index], index) if err := compileSubroutineBody(output, tokens, index); err != nil { return err @@ -588,9 +496,7 @@ func compileSubroutineDeclaration(output *strings.Builder, tokens []tokenizer.To return errors.New("Missing subroutine closing curly brace!") } - output.WriteString(" " + tokens[*index].Value + " \n") - *(index)++ - + WriteToken(output, tokens[*index], index) output.WriteString("\n") output.WriteString("\n") @@ -606,22 +512,19 @@ func compileClass(output *strings.Builder, tokens []tokenizer.Token) error { return errors.New("Jack file must contain one class!") } - output.WriteString(" " + tokens[index].Value + " \n") - index++ + WriteToken(output, tokens[index], &index) if tokens[index].Type != tokenizer.IDENTIFIER { return errors.New("Invalid class name!") } - output.WriteString(" " + tokens[index].Value + " \n") - index++ + WriteToken(output, tokens[index], &index) if tokens[index].Type != tokenizer.SYMBOL || tokens[index].Value != "{" { return errors.New("Missing class opening curly brace!") } - output.WriteString(" " + tokens[index].Value + " \n") - index++ + WriteToken(output, tokens[index], &index) if err := compileClassVarDec(output, tokens, &index); err != nil { return err @@ -635,7 +538,7 @@ func compileClass(output *strings.Builder, tokens []tokenizer.Token) error { return errors.New("Missing class closing curly brace!") } - output.WriteString(" " + tokens[index].Value + " \n") + WriteToken(output, tokens[index], &index) output.WriteString("\n") return nil diff --git a/internal/tokenizer/tokenizer.go b/internal/tokenizer/tokenizer.go index d6f2961..dcad85a 100644 --- a/internal/tokenizer/tokenizer.go +++ b/internal/tokenizer/tokenizer.go @@ -69,6 +69,37 @@ var SYMBOLS = []string{ type Token struct { Value string Type TokenType + XML string +} + +func NewToken(value string, tokenType TokenType) Token { + switch tokenType { + case SYMBOL: + var symbol string + + switch value { + case "<": + symbol = "<" + case ">": + symbol = ">" + case "&": + symbol = "&" + default: + symbol = value + } + + return Token{Value: symbol, Type: tokenType, XML: "symbol"} + case KEYWORD: + return Token{Value: value, Type: tokenType, XML: "keyword"} + case IDENTIFIER: + return Token{Value: value, Type: tokenType, XML: "identifier"} + case INT_CONST: + return Token{Value: value, Type: tokenType, XML: "integerConstant"} + case STR_CONST: + return Token{Value: value, Type: tokenType, XML: "stringConstant"} + default: + return Token{Value: value, Type: tokenType, XML: "token"} + } } func isDigit(text string) (bool, error) { @@ -188,9 +219,9 @@ func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error { read := string(buf) if slices.Contains(KEYWORDS, read) { - *tokens = append(*tokens, Token{Value: read, Type: KEYWORD}) + *tokens = append(*tokens, NewToken(read, KEYWORD)) } else { - *tokens = append(*tokens, Token{Value: read, Type: IDENTIFIER}) + *tokens = append(*tokens, NewToken(read, IDENTIFIER)) } buf = nil @@ -243,7 +274,7 @@ func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error { } } - *tokens = append(*tokens, Token{Value: integerConstant.String(), Type: INT_CONST}) + *tokens = append(*tokens, NewToken(integerConstant.String(), INT_CONST)) continue } } @@ -257,7 +288,7 @@ func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error { return err } - *tokens = append(*tokens, Token{Value: string(b[:len(b)-1]), Type: STR_CONST}) + *tokens = append(*tokens, NewToken(string(b[:len(b)-1]), STR_CONST)) continue } @@ -266,15 +297,15 @@ func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error { read := string(buf) if slices.Contains(KEYWORDS, read) { - *tokens = append(*tokens, Token{Value: read, Type: KEYWORD}) + *tokens = append(*tokens, NewToken(read, KEYWORD)) } else { - *tokens = append(*tokens, Token{Value: read, Type: IDENTIFIER}) + *tokens = append(*tokens, NewToken(read, IDENTIFIER)) } buf = nil } - *tokens = append(*tokens, Token{Value: text, Type: SYMBOL}) + *tokens = append(*tokens, NewToken(text, SYMBOL)) continue }