diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..85758f7 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +CC = gcc +CFLAGS = -Wall -Wextra -std=c11 + +SRC = src/main.c +OBJ = $(SRC:.c=.o) +TARGET = assembler + +all: $(TARGET) + +$(TARGET): $(OBJ) + $(CC) $(CFLAGS) -o $(TARGET) $(OBJ) + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + rm -f $(OBJ) $(TARGET) diff --git a/src/code.c b/src/code.c new file mode 100644 index 0000000..e4d4c8b --- /dev/null +++ b/src/code.c @@ -0,0 +1,70 @@ +#include +#include +#include + +char* decimal_to_fifteen_bits_binary(int decimal) { + char* bin = malloc(16 * sizeof(char)); + for (int i = 14; i >= 0; i--) { + bin[14 - i] = (decimal & (1 << i)) ? '1' : '0'; + } + bin[15] = '\0'; + return bin; +} + +char* translate_dest(const char* dest) { + if (dest == NULL) return strdup("000"); + if (strcmp(dest, "M") == 0) return strdup("001"); + if (strcmp(dest, "D") == 0) return strdup("010"); + if (strcmp(dest, "MD") == 0) return strdup("011"); + if (strcmp(dest, "A") == 0) return strdup("100"); + if (strcmp(dest, "AM") == 0) return strdup("101"); + if (strcmp(dest, "AD") == 0) return strdup("110"); + if (strcmp(dest, "AMD") == 0) return strdup("111"); + return strdup("000"); +} + +char* translate_jump(const char* jump) { + if (jump == NULL) return strdup("000"); + if (strcmp(jump, "JGT") == 0) return strdup("001"); + if (strcmp(jump, "JEQ") == 0) return strdup("010"); + if (strcmp(jump, "JGE") == 0) return strdup("011"); + if (strcmp(jump, "JLT") == 0) return strdup("100"); + if (strcmp(jump, "JNE") == 0) return strdup("101"); + if (strcmp(jump, "JLE") == 0) return strdup("110"); + if (strcmp(jump, "JMP") == 0) return strdup("111"); + return strdup("000"); +} + +char* translate_comp(const char* comp) { + if (comp == NULL) return strdup("0000000"); + if (strcmp(comp, "0") == 0) return strdup("0101010"); + if (strcmp(comp, "1") == 0) return strdup("0111111"); + if (strcmp(comp, "-1") == 0) return strdup("0111010"); + if (strcmp(comp, "D") == 0) return strdup("0001100"); + if (strcmp(strcmp(comp, "A") == 0 ? comp : comp, "A") == 0) return strdup("0110000"); // placeholder logic to match pattern + // For brevity, just implementing a few more key ones + if (strcmp(comp, "!D") == 0) return strdup("0001101"); + if (strcmp(comp, "!A") == 0) return strdup("0110001"); + if (strcmp(comp, "-D") == 0) return strdup("0001111"); + if (strcmp(comp, "-A") == 0) return strdup("0110011"); + if (strcmp(comp, "D+1") == 0) return strdup("0011111"); + if (strcmp(comp, "A+1") == 0) return strdup("0110111"); + if (strcmp(comp, "D-1") == 0) return strdup("0001110"); + if (strcmp(comp, "A-1") == 0) return strdup("0110010"); + if (strcmp(comp, "D+A") == 0) return strdup("0000010"); + if (strcmp(comp, "D-A") == 0) return strdup("0010011"); + if (strcmp(comp, "A-D") == 0) return strdup("0000111"); + if (strcmp(comp, "D&A") == 0) return strdup("0000000"); + if (strcmp(comp, "D|A") == 0) return strdup("0010101"); + if (strcmp(comp, "M") == 0) return strdup("1110000"); + if (strcmp(comp, "!M") == 0) return strdup("1110001"); + if (strcmp(comp, "-M") == 0) return strdup("1110011"); + if (strcmp(comp, "M+1") == 0) return strdup("1110111"); + if (strcmp(comp, "M-1") == 0) return strdup("1110010"); + if (strcmp(comp, "D+M") == 0) return strdup("1000010"); + if (strcmp(comp, "D-M") == 0) return strdup("1010011"); + if (strcmp(comp, "M-D") == 0) return strdup("1000111"); + if (strcmp(comp, "D&M") == 0) return strdup("1000000"); + if (strcmp(comp, "D|M") == 0) return strdup("1010101"); + return strdup("0000000"); +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..d0a893d --- /dev/null +++ b/src/main.c @@ -0,0 +1,158 @@ +#include +#include +#include +#include "types.h" +#include "parser.c" +#include "code.c" + +char* remove_whitespace_and_comments(char* content) { + char* line = strtok(content, "\n"); + char* new_content = malloc(strlen(content) + 1); + new_content[0] = '\0'; + + while (line != NULL) { + char* comment_ptr = strstr(line, "//"); + if (comment_ptr) *comment_ptr = '\0'; + + // Trim leading whitespace + while (*line == ' ' || *line == '\t' || *line == '\r') line++; + + if (strlen(line) > 0) { + strcat(new_content, line); + strcat(new_content, "\n"); + } + line = strtok(NULL, "\n"); + } + return new_content; +} + +char* first_pass(char* content, HashMap* table) { + char* lines[1000]; + int line_count = 0; + char* line = strtok(content, "\n"); + + while (line != NULL) { + lines[line_count++] = strdup(line); + line = strtok(NULL, "\n"); + } + + char* new_content = malloc(strlen(content) + 1); + new_content[0] = '\0'; + + // Handle labels (e.g., (LABEL)) + for (int i = 0; i < line_count; i++) { + if (lines[i][0] == '(' && lines[i][strlen(lines[i]) - 1] == ')') { + char symbol[256]; + strncpy(symbol, lines[i] + 1, strlen(lines[i]) - 2); + symbol[strlen(lines[i]) - 2] = '\0'; + insert(table, symbol, i, ADDRESS_ROM); + } + } + + // Reconstruct content without labels + for (int i = 0; i < line_count; i++) { + if (!(lines[i][0] == '(' && lines[i][strlen(lines[i]) - 1] == ')')) { + strcat(new_content, lines[i]); + strcat(new_content, "\n"); + } + free(lines[i]); + } + + return new_content; +} + +char* second_pass(char* content, HashMap* table) { + char* lines[1000]; + int line_count = 0; + char* line = strtok(content, "\n"); + while (line != NULL) { + lines[line_count++] = strdup(line); + line = strtok(NULL, "\n"); + } + + char* output = malloc(strlen(content) * 10 + 1); // Large enough buffer + output[0] = '\0'; + + for (int i = 0; i < line_count; i++) { + if (strlen(lines[i]) == 0) continue; + + Instruction instr = parse(lines[i], table); + if (instr.type == INSTRUCTION_A) { + int val = atoi(instr.data.a.decimal); + char* bin = decimal_to_fifteen_bits_binary(&val); + strcat(output, "0"); + strcat(output, bin); + strcat(output, "\n"); + free(bin); + } else { + strcat(output, "111"); + char* comp = translate_comp(instr.data.c.comp ? instr.data.c.comp : ""); + char* dest = translate_dest(instr.data.c.dest ? instr.data.c.dest : ""); + char* jump = translate_jump(instr.data.c.jump ? instr.data.c.jump : ""); + strcat(output, comp); + strcat(output, dest); + strcat(output, jump); + strcat(output, "\n"); + free(comp); free(dest); free(jump); + } + free(lines[i]); + } + + return output; +} + +void process(const char* path) { + char filepath[256]; + strncpy(filepath, path, 255); + char* hack_path = strstr(filepath, ".asm"); + if (hack_path) *hack_path = '\0'; + strcat(filepath, ".hack"); + + FILE* f = fopen(path, "r"); + if (!f) { perror("Error opening input file"); return; } + + fseek(f, 0, SEEK_END); + long length = ftell(f); + fseek(f, 0, SEEK_SET); + char* buffer = malloc(length + 1); + fread(buffer, 1, length, f); + buffer[length] = '\0'; + fclose(f); + + HashMap* table = create_map(100); + // Initialize predefined symbols + insert(table, "SP", 0, ADDRESS_RAM); + insert(table, "LCL", 1, ADDRESS_RAM); + insert(table, "ARG", 2, ADDRESS_RAM); + insert(table, "THIS", 3, ADDRESS_RAM); + insert(table, "THAT", 4, ADDRESS_RAM); + insert(table, "SCREEN", 16384, ADDRESS_RAM); + insert(table, "KBD", 24576, ADDRESS_RAM); + // ... (R0-R15 abbreviated) + for(int i=0; i<=15; i++) { + char r[5]; sprintf(r, "R%d", i); + insert(table, r, i, ADDRESS_RAM); + } + + char* cleaned = remove_whitespace_and_comments(buffer); + char* first = first_pass(cleaned, table); + char* second = second_pass(first, table); + + FILE* out = fopen(filepath, "w"); + fprintf(out, "%s", second); + fclose(out); + + printf("Assembly complete: %s\n", filepath); + + free(buffer); free(cleaned); free(first); free(second); + free_map(table); +} + +int main(int argc, char** argv) { + if (argc < 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + process(argv[1]); + return 0; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..b134159 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,158 @@ +#include +#include +#include +#include +#include "types.h" + +// Simple hash map implementation for C +typedef struct Node { + char* key; + int value; + Address address; + struct Node* next; +} Node; + +typedef struct { + Node** table; + int size; +} HashMap; + +unsigned int hash(const char* key, int size) { + unsigned int h = 0; + while (*key) h = h * 31 + *key++; + return h % size; +} + +HashMap* create_map(int size) { + HashMap* map = malloc(sizeof(HashMap)); + map->size = size; + map->table = calloc(size, sizeof(Node*)); + return map; +} + +void insert(HashMap* map, const char* key, int value, Address address) { + unsigned int h = hash(key, map->size); + Node* new_node = malloc(sizeof(Node)); + new_node->key = strdup(key); + new_node->value = value; + new_node->address = address; + new_node->next = map->table[h]; + map->table[h] = new_node; +} + +Node* get(HashMap* map, const char* key) { + unsigned int h = hash(key, map->size); + Node* curr = map->table[h]; + while (curr) { + if (strcmp(curr->key, key) == 0) return curr; + curr = curr->next; + } + return NULL; +} + +void remove_key(HashMap* map, const char* key) { + unsigned int h = hash(key, map->size); + Node* curr = map->table[h]; + Node* prev = NULL; + while (curr) { + if (strcmp(curr->key, key) == 0) { + if (prev) prev->next = curr->next; + else map->table[h] = curr->next; + free(curr->key); + free(curr); + return; + } + prev = curr; + curr = curr->next; + } +} + +void free_map(HashMap* map) { + for (int i = 0; i < map->size; i++) { + Node* curr = map->table[i]; + while (curr) { + Node* temp = curr; + curr = curr->next; + free(temp->key); + free(temp); + } + } + free(map->table); + free(map); +} + +// Parser implementation +Instruction parse(const char* instruction, HashMap* table) { + char* instr_copy = strdup(instruction); + Instruction result; + + if (instr_copy[0] == '@') { + char* val_str = instr_copy + 1; + char* endptr; + long val = strtol(val_str, &endptr, 10); + + if (*endptr == '\0') { + result.type = INSTRUCTION_A; + result.data.a.decimal = strdup(val_str); + } else { + Node* node = get(table, val_str); + if (node) { + char buf[32]; + sprintf(buf, "%ld", node->value); + result.type = INTRUCTION_A; + result.data.a.decimal = strdup(buf); + } else { + // Finding max address logic simplified for brevity + int max_addr = 0; + for (int i = 0; i < table->size; i++) { + Node* curr = table->table[i]; + while (curr) { + if (curr->address == ADDRESS_RAM && curr->value > max_addr) { + max_addr = curr->value; + } + curr = curr->next; + } + } + int next_addr = max_addr + 1; + char buf[32]; + sprintf(buf, "%d", next_addr); + insert(table, val_str, next_addr, ADDRESS_RAM); + result.type = INSTRUCTION_A; + result.data.a.decimal = strdup(buf); + } + } + } else { + result.type = INSTRUCTION_C; + result.data.c.dest = NULL; + result.data.c.comp = NULL; + result.data.c.jump = NULL; + + char* eq = strchr(instr_copy, '='); + char* semi = strchr(instr_copy, ';'); + + if (eq && semi) { + // dest=comp;jump + size_t dest_len = eq - instr_copy; + result.data.c.dest = strndup(instr_copy, dest_len); + size_t comp_len = semi - (eq + 1); + result.data.c.comp = strndup(eq + 1, comp_len); + result.data.c.jump = strdup(semi + 1); + } else if (eq && !semi) { + // dest=comp + size_t dest_len = eq - instr_copy; + result.data.c.dest = strndup(instr_copy, dest_len); + result.data.c.comp = strdup(eq + 1); + } else if (!eq && semi) { + // comp;jump + size_t comp_len = semi - instr_copy; + result.data.c.comp = strndup(instr_copy, comp_len); + result.data.c.jump = strdup(semi + 1); + } else { + // comp + result.data.c.comp = strdup(instr_copy); + } + } + + free(instr_copy); + return result; +} diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000..1f5b070 --- /dev/null +++ b/src/types.h @@ -0,0 +1,34 @@ +#ifndef TYPES_H +#define TYPES_H + +#include + +typedef struct { + char* decimal; +} AInstruction; + +typedef struct { + char* dest; + char* comp; + char* jump; +} CInstruction; + +typedef enum { + ADDRESS_RAM, + ADDRESS_ROM +} Address; + +typedef enum { + INSTRUCTION_A, + INSTRUCTION_C +} InstructionType; + +typedef struct { + InstructionType type; + union { + AInstruction a; + CInstruction c; + } data; +} Instruction; + +#endif // TYPES_H