diff --git a/internal/tokenizer/tokenizer.go b/internal/tokenizer/tokenizer.go index f6818d4..c08e8dd 100644 --- a/internal/tokenizer/tokenizer.go +++ b/internal/tokenizer/tokenizer.go @@ -4,6 +4,9 @@ import ( "bufio" "regexp" "slices" + "strings" + + "github.com/hazemKrimi/jack-compiler/internal/utils" ) type TokenType int @@ -25,6 +28,7 @@ var KEYWORDS = []string{ "static", "var", "int", + "char", "read", "boolean", "void", @@ -67,62 +71,215 @@ type Token struct { Type TokenType } -func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error { - read := "" - buf := []byte{} +func isDigit(text string) (bool, error) { + if match, err := regexp.MatchString("^[[:digit:]]$", text); err != nil { + return false, err + } else if match { + return true, nil + } - for { - _, err := reader.Read(buf) - read += string(buf) + return false, nil +} + +func isWhiteSpace(text string) (bool, error) { + if match, err := regexp.MatchString("^[[:space:]]$", text); err != nil { + return false, err + } else if match { + return true, nil + } + + return false, nil +} + +func isComment(text string, reader *bufio.Reader) (bool, error) { + if text == "/" { + n, err := reader.ReadByte() + next := string(n) if err != nil { + return false, err + } + + switch next { + case "/": + _, err := reader.ReadBytes('\n') + + if err != nil { + return false, err + } + + return true, nil + case "*": + _, err := reader.ReadBytes('*') + + if err != nil { + return false, err + } + + p, err := reader.Peek(1) + + if err != nil { + return false, err + } + + for string(p) != "/" { + _, err := reader.ReadBytes('*') + + if err != nil { + return false, err + } + + p, err = reader.Peek(1) + } + + _, err = reader.ReadBytes('/') + + if err != nil { + return false, err + } + + return true, nil + default: + err = reader.UnreadByte() + + if err != nil { + return false, err + } + + return false, nil + } + } + + return false, nil +} + +func ExtractTokens(tokens *[]Token, reader *bufio.Reader) error { + buf := make([]byte, 0) + + for { + b, err := reader.ReadByte() + + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { return err } - if match, _ := regexp.MatchString("^[[:space:]]$", read); match { + text := string(b) + comment, err := isComment(text, reader) + + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { + return err + } else if comment { continue } - if read == "/" { - next, err := reader.ReadByte() + whitespace, err := isWhiteSpace(text) - if err != nil { - return err + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { + return err + } else if whitespace { + if len(buf) > 0 { + + read := string(buf) + + if slices.Contains(KEYWORDS, read) { + *tokens = append(*tokens, Token{Value: read, Type: KEYWORD}) + } else { + *tokens = append(*tokens, Token{Value: read, Type: IDENTIFIER}) + } + + buf = nil } - if string(next) == "/" || string(next) == "*" { - _, err := reader.ReadBytes('/') + continue + } - if err != nil { - return err - } - } else { - *tokens = append(*tokens, Token{Value: read, Type: SYMBOL}) - read = "" + digit, err := isDigit(text) - err := reader.UnreadByte() + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { + return err + } else if digit { + if len(buf) == 0 { + var integerConstant strings.Builder - if err != nil { - return err + integerConstant.WriteString(text) + + for { + b, err := reader.ReadByte() + + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { + return err + } + + anotherDigit, err := isDigit(string(b)) + + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { + return err + } + + if !anotherDigit { + err := reader.UnreadByte() + + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { + return err + } + + break + } else { + integerConstant.WriteString(string(b)) + } } + *tokens = append(*tokens, Token{Value: integerConstant.String(), Type: INT_CONST}) continue } } - if slices.Contains(SYMBOLS, read) { - *tokens = append(*tokens, Token{Value: read, Type: SYMBOL}) - read = "" + if text == "\"" { + b, err := reader.ReadBytes('"') + + if isErr, isEOF := utils.CheckReaderError(err); isEOF { + break + } else if isErr { + return err + } + + *tokens = append(*tokens, Token{Value: string(b[:len(b)-1]), Type: STR_CONST}) + continue + } + + if slices.Contains(SYMBOLS, text) { + if len(buf) > 0 { + read := string(buf) + + if slices.Contains(KEYWORDS, read) { + *tokens = append(*tokens, Token{Value: read, Type: KEYWORD}) + } else { + *tokens = append(*tokens, Token{Value: read, Type: IDENTIFIER}) + } + + buf = nil + } + + *tokens = append(*tokens, Token{Value: text, Type: SYMBOL}) continue } - if slices.Contains(KEYWORDS, read) { - *tokens = append(*tokens, Token{Value: read, Type: KEYWORD}) - read = "" - - continue - } + buf = append(buf, b) } return nil