mirror of
https://github.com/hazemKrimi/hack-assembler.git
synced 2026-07-01 09:11:24 +00:00
wip: rewrite in C
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
CC = gcc
|
||||
CFLAGS = -Wall -Wextra -std=c11
|
||||
|
||||
SRC = src/main.c
|
||||
OBJ = $(SRC:.c=.o)
|
||||
TARGET = assembler
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJ)
|
||||
$(CC) $(CFLAGS) -o $(TARGET) $(OBJ)
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
clean:
|
||||
rm -f $(OBJ) $(TARGET)
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
char* decimal_to_fifteen_bits_binary(int decimal) {
|
||||
char* bin = malloc(16 * sizeof(char));
|
||||
for (int i = 14; i >= 0; i--) {
|
||||
bin[14 - i] = (decimal & (1 << i)) ? '1' : '0';
|
||||
}
|
||||
bin[15] = '\0';
|
||||
return bin;
|
||||
}
|
||||
|
||||
char* translate_dest(const char* dest) {
|
||||
if (dest == NULL) return strdup("000");
|
||||
if (strcmp(dest, "M") == 0) return strdup("001");
|
||||
if (strcmp(dest, "D") == 0) return strdup("010");
|
||||
if (strcmp(dest, "MD") == 0) return strdup("011");
|
||||
if (strcmp(dest, "A") == 0) return strdup("100");
|
||||
if (strcmp(dest, "AM") == 0) return strdup("101");
|
||||
if (strcmp(dest, "AD") == 0) return strdup("110");
|
||||
if (strcmp(dest, "AMD") == 0) return strdup("111");
|
||||
return strdup("000");
|
||||
}
|
||||
|
||||
char* translate_jump(const char* jump) {
|
||||
if (jump == NULL) return strdup("000");
|
||||
if (strcmp(jump, "JGT") == 0) return strdup("001");
|
||||
if (strcmp(jump, "JEQ") == 0) return strdup("010");
|
||||
if (strcmp(jump, "JGE") == 0) return strdup("011");
|
||||
if (strcmp(jump, "JLT") == 0) return strdup("100");
|
||||
if (strcmp(jump, "JNE") == 0) return strdup("101");
|
||||
if (strcmp(jump, "JLE") == 0) return strdup("110");
|
||||
if (strcmp(jump, "JMP") == 0) return strdup("111");
|
||||
return strdup("000");
|
||||
}
|
||||
|
||||
char* translate_comp(const char* comp) {
|
||||
if (comp == NULL) return strdup("0000000");
|
||||
if (strcmp(comp, "0") == 0) return strdup("0101010");
|
||||
if (strcmp(comp, "1") == 0) return strdup("0111111");
|
||||
if (strcmp(comp, "-1") == 0) return strdup("0111010");
|
||||
if (strcmp(comp, "D") == 0) return strdup("0001100");
|
||||
if (strcmp(strcmp(comp, "A") == 0 ? comp : comp, "A") == 0) return strdup("0110000"); // placeholder logic to match pattern
|
||||
// For brevity, just implementing a few more key ones
|
||||
if (strcmp(comp, "!D") == 0) return strdup("0001101");
|
||||
if (strcmp(comp, "!A") == 0) return strdup("0110001");
|
||||
if (strcmp(comp, "-D") == 0) return strdup("0001111");
|
||||
if (strcmp(comp, "-A") == 0) return strdup("0110011");
|
||||
if (strcmp(comp, "D+1") == 0) return strdup("0011111");
|
||||
if (strcmp(comp, "A+1") == 0) return strdup("0110111");
|
||||
if (strcmp(comp, "D-1") == 0) return strdup("0001110");
|
||||
if (strcmp(comp, "A-1") == 0) return strdup("0110010");
|
||||
if (strcmp(comp, "D+A") == 0) return strdup("0000010");
|
||||
if (strcmp(comp, "D-A") == 0) return strdup("0010011");
|
||||
if (strcmp(comp, "A-D") == 0) return strdup("0000111");
|
||||
if (strcmp(comp, "D&A") == 0) return strdup("0000000");
|
||||
if (strcmp(comp, "D|A") == 0) return strdup("0010101");
|
||||
if (strcmp(comp, "M") == 0) return strdup("1110000");
|
||||
if (strcmp(comp, "!M") == 0) return strdup("1110001");
|
||||
if (strcmp(comp, "-M") == 0) return strdup("1110011");
|
||||
if (strcmp(comp, "M+1") == 0) return strdup("1110111");
|
||||
if (strcmp(comp, "M-1") == 0) return strdup("1110010");
|
||||
if (strcmp(comp, "D+M") == 0) return strdup("1000010");
|
||||
if (strcmp(comp, "D-M") == 0) return strdup("1010011");
|
||||
if (strcmp(comp, "M-D") == 0) return strdup("1000111");
|
||||
if (strcmp(comp, "D&M") == 0) return strdup("1000000");
|
||||
if (strcmp(comp, "D|M") == 0) return strdup("1010101");
|
||||
return strdup("0000000");
|
||||
}
|
||||
+158
@@ -0,0 +1,158 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "types.h"
|
||||
#include "parser.c"
|
||||
#include "code.c"
|
||||
|
||||
char* remove_whitespace_and_comments(char* content) {
|
||||
char* line = strtok(content, "\n");
|
||||
char* new_content = malloc(strlen(content) + 1);
|
||||
new_content[0] = '\0';
|
||||
|
||||
while (line != NULL) {
|
||||
char* comment_ptr = strstr(line, "//");
|
||||
if (comment_ptr) *comment_ptr = '\0';
|
||||
|
||||
// Trim leading whitespace
|
||||
while (*line == ' ' || *line == '\t' || *line == '\r') line++;
|
||||
|
||||
if (strlen(line) > 0) {
|
||||
strcat(new_content, line);
|
||||
strcat(new_content, "\n");
|
||||
}
|
||||
line = strtok(NULL, "\n");
|
||||
}
|
||||
return new_content;
|
||||
}
|
||||
|
||||
char* first_pass(char* content, HashMap* table) {
|
||||
char* lines[1000];
|
||||
int line_count = 0;
|
||||
char* line = strtok(content, "\n");
|
||||
|
||||
while (line != NULL) {
|
||||
lines[line_count++] = strdup(line);
|
||||
line = strtok(NULL, "\n");
|
||||
}
|
||||
|
||||
char* new_content = malloc(strlen(content) + 1);
|
||||
new_content[0] = '\0';
|
||||
|
||||
// Handle labels (e.g., (LABEL))
|
||||
for (int i = 0; i < line_count; i++) {
|
||||
if (lines[i][0] == '(' && lines[i][strlen(lines[i]) - 1] == ')') {
|
||||
char symbol[256];
|
||||
strncpy(symbol, lines[i] + 1, strlen(lines[i]) - 2);
|
||||
symbol[strlen(lines[i]) - 2] = '\0';
|
||||
insert(table, symbol, i, ADDRESS_ROM);
|
||||
}
|
||||
}
|
||||
|
||||
// Reconstruct content without labels
|
||||
for (int i = 0; i < line_count; i++) {
|
||||
if (!(lines[i][0] == '(' && lines[i][strlen(lines[i]) - 1] == ')')) {
|
||||
strcat(new_content, lines[i]);
|
||||
strcat(new_content, "\n");
|
||||
}
|
||||
free(lines[i]);
|
||||
}
|
||||
|
||||
return new_content;
|
||||
}
|
||||
|
||||
char* second_pass(char* content, HashMap* table) {
|
||||
char* lines[1000];
|
||||
int line_count = 0;
|
||||
char* line = strtok(content, "\n");
|
||||
while (line != NULL) {
|
||||
lines[line_count++] = strdup(line);
|
||||
line = strtok(NULL, "\n");
|
||||
}
|
||||
|
||||
char* output = malloc(strlen(content) * 10 + 1); // Large enough buffer
|
||||
output[0] = '\0';
|
||||
|
||||
for (int i = 0; i < line_count; i++) {
|
||||
if (strlen(lines[i]) == 0) continue;
|
||||
|
||||
Instruction instr = parse(lines[i], table);
|
||||
if (instr.type == INSTRUCTION_A) {
|
||||
int val = atoi(instr.data.a.decimal);
|
||||
char* bin = decimal_to_fifteen_bits_binary(&val);
|
||||
strcat(output, "0");
|
||||
strcat(output, bin);
|
||||
strcat(output, "\n");
|
||||
free(bin);
|
||||
} else {
|
||||
strcat(output, "111");
|
||||
char* comp = translate_comp(instr.data.c.comp ? instr.data.c.comp : "");
|
||||
char* dest = translate_dest(instr.data.c.dest ? instr.data.c.dest : "");
|
||||
char* jump = translate_jump(instr.data.c.jump ? instr.data.c.jump : "");
|
||||
strcat(output, comp);
|
||||
strcat(output, dest);
|
||||
strcat(output, jump);
|
||||
strcat(output, "\n");
|
||||
free(comp); free(dest); free(jump);
|
||||
}
|
||||
free(lines[i]);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void process(const char* path) {
|
||||
char filepath[256];
|
||||
strncpy(filepath, path, 255);
|
||||
char* hack_path = strstr(filepath, ".asm");
|
||||
if (hack_path) *hack_path = '\0';
|
||||
strcat(filepath, ".hack");
|
||||
|
||||
FILE* f = fopen(path, "r");
|
||||
if (!f) { perror("Error opening input file"); return; }
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
long length = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
char* buffer = malloc(length + 1);
|
||||
fread(buffer, 1, length, f);
|
||||
buffer[length] = '\0';
|
||||
fclose(f);
|
||||
|
||||
HashMap* table = create_map(100);
|
||||
// Initialize predefined symbols
|
||||
insert(table, "SP", 0, ADDRESS_RAM);
|
||||
insert(table, "LCL", 1, ADDRESS_RAM);
|
||||
insert(table, "ARG", 2, ADDRESS_RAM);
|
||||
insert(table, "THIS", 3, ADDRESS_RAM);
|
||||
insert(table, "THAT", 4, ADDRESS_RAM);
|
||||
insert(table, "SCREEN", 16384, ADDRESS_RAM);
|
||||
insert(table, "KBD", 24576, ADDRESS_RAM);
|
||||
// ... (R0-R15 abbreviated)
|
||||
for(int i=0; i<=15; i++) {
|
||||
char r[5]; sprintf(r, "R%d", i);
|
||||
insert(table, r, i, ADDRESS_RAM);
|
||||
}
|
||||
|
||||
char* cleaned = remove_whitespace_and_comments(buffer);
|
||||
char* first = first_pass(cleaned, table);
|
||||
char* second = second_pass(first, table);
|
||||
|
||||
FILE* out = fopen(filepath, "w");
|
||||
fprintf(out, "%s", second);
|
||||
fclose(out);
|
||||
|
||||
printf("Assembly complete: %s\n", filepath);
|
||||
|
||||
free(buffer); free(cleaned); free(first); free(second);
|
||||
free_map(table);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 2) {
|
||||
printf("Usage: %s <file.asm>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
process(argv[1]);
|
||||
return 0;
|
||||
}
|
||||
+158
@@ -0,0 +1,158 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include "types.h"
|
||||
|
||||
// Simple hash map implementation for C
|
||||
typedef struct Node {
|
||||
char* key;
|
||||
int value;
|
||||
Address address;
|
||||
struct Node* next;
|
||||
} Node;
|
||||
|
||||
typedef struct {
|
||||
Node** table;
|
||||
int size;
|
||||
} HashMap;
|
||||
|
||||
unsigned int hash(const char* key, int size) {
|
||||
unsigned int h = 0;
|
||||
while (*key) h = h * 31 + *key++;
|
||||
return h % size;
|
||||
}
|
||||
|
||||
HashMap* create_map(int size) {
|
||||
HashMap* map = malloc(sizeof(HashMap));
|
||||
map->size = size;
|
||||
map->table = calloc(size, sizeof(Node*));
|
||||
return map;
|
||||
}
|
||||
|
||||
void insert(HashMap* map, const char* key, int value, Address address) {
|
||||
unsigned int h = hash(key, map->size);
|
||||
Node* new_node = malloc(sizeof(Node));
|
||||
new_node->key = strdup(key);
|
||||
new_node->value = value;
|
||||
new_node->address = address;
|
||||
new_node->next = map->table[h];
|
||||
map->table[h] = new_node;
|
||||
}
|
||||
|
||||
Node* get(HashMap* map, const char* key) {
|
||||
unsigned int h = hash(key, map->size);
|
||||
Node* curr = map->table[h];
|
||||
while (curr) {
|
||||
if (strcmp(curr->key, key) == 0) return curr;
|
||||
curr = curr->next;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void remove_key(HashMap* map, const char* key) {
|
||||
unsigned int h = hash(key, map->size);
|
||||
Node* curr = map->table[h];
|
||||
Node* prev = NULL;
|
||||
while (curr) {
|
||||
if (strcmp(curr->key, key) == 0) {
|
||||
if (prev) prev->next = curr->next;
|
||||
else map->table[h] = curr->next;
|
||||
free(curr->key);
|
||||
free(curr);
|
||||
return;
|
||||
}
|
||||
prev = curr;
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
|
||||
void free_map(HashMap* map) {
|
||||
for (int i = 0; i < map->size; i++) {
|
||||
Node* curr = map->table[i];
|
||||
while (curr) {
|
||||
Node* temp = curr;
|
||||
curr = curr->next;
|
||||
free(temp->key);
|
||||
free(temp);
|
||||
}
|
||||
}
|
||||
free(map->table);
|
||||
free(map);
|
||||
}
|
||||
|
||||
// Parser implementation
|
||||
Instruction parse(const char* instruction, HashMap* table) {
|
||||
char* instr_copy = strdup(instruction);
|
||||
Instruction result;
|
||||
|
||||
if (instr_copy[0] == '@') {
|
||||
char* val_str = instr_copy + 1;
|
||||
char* endptr;
|
||||
long val = strtol(val_str, &endptr, 10);
|
||||
|
||||
if (*endptr == '\0') {
|
||||
result.type = INSTRUCTION_A;
|
||||
result.data.a.decimal = strdup(val_str);
|
||||
} else {
|
||||
Node* node = get(table, val_str);
|
||||
if (node) {
|
||||
char buf[32];
|
||||
sprintf(buf, "%ld", node->value);
|
||||
result.type = INTRUCTION_A;
|
||||
result.data.a.decimal = strdup(buf);
|
||||
} else {
|
||||
// Finding max address logic simplified for brevity
|
||||
int max_addr = 0;
|
||||
for (int i = 0; i < table->size; i++) {
|
||||
Node* curr = table->table[i];
|
||||
while (curr) {
|
||||
if (curr->address == ADDRESS_RAM && curr->value > max_addr) {
|
||||
max_addr = curr->value;
|
||||
}
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
int next_addr = max_addr + 1;
|
||||
char buf[32];
|
||||
sprintf(buf, "%d", next_addr);
|
||||
insert(table, val_str, next_addr, ADDRESS_RAM);
|
||||
result.type = INSTRUCTION_A;
|
||||
result.data.a.decimal = strdup(buf);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.type = INSTRUCTION_C;
|
||||
result.data.c.dest = NULL;
|
||||
result.data.c.comp = NULL;
|
||||
result.data.c.jump = NULL;
|
||||
|
||||
char* eq = strchr(instr_copy, '=');
|
||||
char* semi = strchr(instr_copy, ';');
|
||||
|
||||
if (eq && semi) {
|
||||
// dest=comp;jump
|
||||
size_t dest_len = eq - instr_copy;
|
||||
result.data.c.dest = strndup(instr_copy, dest_len);
|
||||
size_t comp_len = semi - (eq + 1);
|
||||
result.data.c.comp = strndup(eq + 1, comp_len);
|
||||
result.data.c.jump = strdup(semi + 1);
|
||||
} else if (eq && !semi) {
|
||||
// dest=comp
|
||||
size_t dest_len = eq - instr_copy;
|
||||
result.data.c.dest = strndup(instr_copy, dest_len);
|
||||
result.data.c.comp = strdup(eq + 1);
|
||||
} else if (!eq && semi) {
|
||||
// comp;jump
|
||||
size_t comp_len = semi - instr_copy;
|
||||
result.data.c.comp = strndup(instr_copy, comp_len);
|
||||
result.data.c.jump = strdup(semi + 1);
|
||||
} else {
|
||||
// comp
|
||||
result.data.c.comp = strdup(instr_copy);
|
||||
}
|
||||
}
|
||||
|
||||
free(instr_copy);
|
||||
return result;
|
||||
}
|
||||
+34
@@ -0,0 +1,34 @@
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef struct {
|
||||
char* decimal;
|
||||
} AInstruction;
|
||||
|
||||
typedef struct {
|
||||
char* dest;
|
||||
char* comp;
|
||||
char* jump;
|
||||
} CInstruction;
|
||||
|
||||
typedef enum {
|
||||
ADDRESS_RAM,
|
||||
ADDRESS_ROM
|
||||
} Address;
|
||||
|
||||
typedef enum {
|
||||
INSTRUCTION_A,
|
||||
INSTRUCTION_C
|
||||
} InstructionType;
|
||||
|
||||
typedef struct {
|
||||
InstructionType type;
|
||||
union {
|
||||
AInstruction a;
|
||||
CInstruction c;
|
||||
} data;
|
||||
} Instruction;
|
||||
|
||||
#endif // TYPES_H
|
||||
Reference in New Issue
Block a user