wip: rewrite in C

This commit is contained in:
2026-05-11 08:56:30 +01:00
parent ec27d1abb2
commit 463e5b1e56
5 changed files with 437 additions and 0 deletions
+17
View File
@@ -0,0 +1,17 @@
CC = gcc
CFLAGS = -Wall -Wextra -std=c11
SRC = src/main.c
OBJ = $(SRC:.c=.o)
TARGET = assembler
all: $(TARGET)
$(TARGET): $(OBJ)
$(CC) $(CFLAGS) -o $(TARGET) $(OBJ)
%.o: %.c
$(CC) $(CFLAGS) -c $< -o $@
clean:
rm -f $(OBJ) $(TARGET)
+70
View File
@@ -0,0 +1,70 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char* decimal_to_fifteen_bits_binary(int decimal) {
char* bin = malloc(16 * sizeof(char));
for (int i = 14; i >= 0; i--) {
bin[14 - i] = (decimal & (1 << i)) ? '1' : '0';
}
bin[15] = '\0';
return bin;
}
char* translate_dest(const char* dest) {
if (dest == NULL) return strdup("000");
if (strcmp(dest, "M") == 0) return strdup("001");
if (strcmp(dest, "D") == 0) return strdup("010");
if (strcmp(dest, "MD") == 0) return strdup("011");
if (strcmp(dest, "A") == 0) return strdup("100");
if (strcmp(dest, "AM") == 0) return strdup("101");
if (strcmp(dest, "AD") == 0) return strdup("110");
if (strcmp(dest, "AMD") == 0) return strdup("111");
return strdup("000");
}
char* translate_jump(const char* jump) {
if (jump == NULL) return strdup("000");
if (strcmp(jump, "JGT") == 0) return strdup("001");
if (strcmp(jump, "JEQ") == 0) return strdup("010");
if (strcmp(jump, "JGE") == 0) return strdup("011");
if (strcmp(jump, "JLT") == 0) return strdup("100");
if (strcmp(jump, "JNE") == 0) return strdup("101");
if (strcmp(jump, "JLE") == 0) return strdup("110");
if (strcmp(jump, "JMP") == 0) return strdup("111");
return strdup("000");
}
char* translate_comp(const char* comp) {
if (comp == NULL) return strdup("0000000");
if (strcmp(comp, "0") == 0) return strdup("0101010");
if (strcmp(comp, "1") == 0) return strdup("0111111");
if (strcmp(comp, "-1") == 0) return strdup("0111010");
if (strcmp(comp, "D") == 0) return strdup("0001100");
if (strcmp(strcmp(comp, "A") == 0 ? comp : comp, "A") == 0) return strdup("0110000"); // placeholder logic to match pattern
// For brevity, just implementing a few more key ones
if (strcmp(comp, "!D") == 0) return strdup("0001101");
if (strcmp(comp, "!A") == 0) return strdup("0110001");
if (strcmp(comp, "-D") == 0) return strdup("0001111");
if (strcmp(comp, "-A") == 0) return strdup("0110011");
if (strcmp(comp, "D+1") == 0) return strdup("0011111");
if (strcmp(comp, "A+1") == 0) return strdup("0110111");
if (strcmp(comp, "D-1") == 0) return strdup("0001110");
if (strcmp(comp, "A-1") == 0) return strdup("0110010");
if (strcmp(comp, "D+A") == 0) return strdup("0000010");
if (strcmp(comp, "D-A") == 0) return strdup("0010011");
if (strcmp(comp, "A-D") == 0) return strdup("0000111");
if (strcmp(comp, "D&A") == 0) return strdup("0000000");
if (strcmp(comp, "D|A") == 0) return strdup("0010101");
if (strcmp(comp, "M") == 0) return strdup("1110000");
if (strcmp(comp, "!M") == 0) return strdup("1110001");
if (strcmp(comp, "-M") == 0) return strdup("1110011");
if (strcmp(comp, "M+1") == 0) return strdup("1110111");
if (strcmp(comp, "M-1") == 0) return strdup("1110010");
if (strcmp(comp, "D+M") == 0) return strdup("1000010");
if (strcmp(comp, "D-M") == 0) return strdup("1010011");
if (strcmp(comp, "M-D") == 0) return strdup("1000111");
if (strcmp(comp, "D&M") == 0) return strdup("1000000");
if (strcmp(comp, "D|M") == 0) return strdup("1010101");
return strdup("0000000");
}
+158
View File
@@ -0,0 +1,158 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "types.h"
#include "parser.c"
#include "code.c"
char* remove_whitespace_and_comments(char* content) {
char* line = strtok(content, "\n");
char* new_content = malloc(strlen(content) + 1);
new_content[0] = '\0';
while (line != NULL) {
char* comment_ptr = strstr(line, "//");
if (comment_ptr) *comment_ptr = '\0';
// Trim leading whitespace
while (*line == ' ' || *line == '\t' || *line == '\r') line++;
if (strlen(line) > 0) {
strcat(new_content, line);
strcat(new_content, "\n");
}
line = strtok(NULL, "\n");
}
return new_content;
}
char* first_pass(char* content, HashMap* table) {
char* lines[1000];
int line_count = 0;
char* line = strtok(content, "\n");
while (line != NULL) {
lines[line_count++] = strdup(line);
line = strtok(NULL, "\n");
}
char* new_content = malloc(strlen(content) + 1);
new_content[0] = '\0';
// Handle labels (e.g., (LABEL))
for (int i = 0; i < line_count; i++) {
if (lines[i][0] == '(' && lines[i][strlen(lines[i]) - 1] == ')') {
char symbol[256];
strncpy(symbol, lines[i] + 1, strlen(lines[i]) - 2);
symbol[strlen(lines[i]) - 2] = '\0';
insert(table, symbol, i, ADDRESS_ROM);
}
}
// Reconstruct content without labels
for (int i = 0; i < line_count; i++) {
if (!(lines[i][0] == '(' && lines[i][strlen(lines[i]) - 1] == ')')) {
strcat(new_content, lines[i]);
strcat(new_content, "\n");
}
free(lines[i]);
}
return new_content;
}
char* second_pass(char* content, HashMap* table) {
char* lines[1000];
int line_count = 0;
char* line = strtok(content, "\n");
while (line != NULL) {
lines[line_count++] = strdup(line);
line = strtok(NULL, "\n");
}
char* output = malloc(strlen(content) * 10 + 1); // Large enough buffer
output[0] = '\0';
for (int i = 0; i < line_count; i++) {
if (strlen(lines[i]) == 0) continue;
Instruction instr = parse(lines[i], table);
if (instr.type == INSTRUCTION_A) {
int val = atoi(instr.data.a.decimal);
char* bin = decimal_to_fifteen_bits_binary(&val);
strcat(output, "0");
strcat(output, bin);
strcat(output, "\n");
free(bin);
} else {
strcat(output, "111");
char* comp = translate_comp(instr.data.c.comp ? instr.data.c.comp : "");
char* dest = translate_dest(instr.data.c.dest ? instr.data.c.dest : "");
char* jump = translate_jump(instr.data.c.jump ? instr.data.c.jump : "");
strcat(output, comp);
strcat(output, dest);
strcat(output, jump);
strcat(output, "\n");
free(comp); free(dest); free(jump);
}
free(lines[i]);
}
return output;
}
void process(const char* path) {
char filepath[256];
strncpy(filepath, path, 255);
char* hack_path = strstr(filepath, ".asm");
if (hack_path) *hack_path = '\0';
strcat(filepath, ".hack");
FILE* f = fopen(path, "r");
if (!f) { perror("Error opening input file"); return; }
fseek(f, 0, SEEK_END);
long length = ftell(f);
fseek(f, 0, SEEK_SET);
char* buffer = malloc(length + 1);
fread(buffer, 1, length, f);
buffer[length] = '\0';
fclose(f);
HashMap* table = create_map(100);
// Initialize predefined symbols
insert(table, "SP", 0, ADDRESS_RAM);
insert(table, "LCL", 1, ADDRESS_RAM);
insert(table, "ARG", 2, ADDRESS_RAM);
insert(table, "THIS", 3, ADDRESS_RAM);
insert(table, "THAT", 4, ADDRESS_RAM);
insert(table, "SCREEN", 16384, ADDRESS_RAM);
insert(table, "KBD", 24576, ADDRESS_RAM);
// ... (R0-R15 abbreviated)
for(int i=0; i<=15; i++) {
char r[5]; sprintf(r, "R%d", i);
insert(table, r, i, ADDRESS_RAM);
}
char* cleaned = remove_whitespace_and_comments(buffer);
char* first = first_pass(cleaned, table);
char* second = second_pass(first, table);
FILE* out = fopen(filepath, "w");
fprintf(out, "%s", second);
fclose(out);
printf("Assembly complete: %s\n", filepath);
free(buffer); free(cleaned); free(first); free(second);
free_map(table);
}
int main(int argc, char** argv) {
if (argc < 2) {
printf("Usage: %s <file.asm>\n", argv[0]);
return 1;
}
process(argv[1]);
return 0;
}
+158
View File
@@ -0,0 +1,158 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "types.h"
// Simple hash map implementation for C
typedef struct Node {
char* key;
int value;
Address address;
struct Node* next;
} Node;
typedef struct {
Node** table;
int size;
} HashMap;
unsigned int hash(const char* key, int size) {
unsigned int h = 0;
while (*key) h = h * 31 + *key++;
return h % size;
}
HashMap* create_map(int size) {
HashMap* map = malloc(sizeof(HashMap));
map->size = size;
map->table = calloc(size, sizeof(Node*));
return map;
}
void insert(HashMap* map, const char* key, int value, Address address) {
unsigned int h = hash(key, map->size);
Node* new_node = malloc(sizeof(Node));
new_node->key = strdup(key);
new_node->value = value;
new_node->address = address;
new_node->next = map->table[h];
map->table[h] = new_node;
}
Node* get(HashMap* map, const char* key) {
unsigned int h = hash(key, map->size);
Node* curr = map->table[h];
while (curr) {
if (strcmp(curr->key, key) == 0) return curr;
curr = curr->next;
}
return NULL;
}
void remove_key(HashMap* map, const char* key) {
unsigned int h = hash(key, map->size);
Node* curr = map->table[h];
Node* prev = NULL;
while (curr) {
if (strcmp(curr->key, key) == 0) {
if (prev) prev->next = curr->next;
else map->table[h] = curr->next;
free(curr->key);
free(curr);
return;
}
prev = curr;
curr = curr->next;
}
}
void free_map(HashMap* map) {
for (int i = 0; i < map->size; i++) {
Node* curr = map->table[i];
while (curr) {
Node* temp = curr;
curr = curr->next;
free(temp->key);
free(temp);
}
}
free(map->table);
free(map);
}
// Parser implementation
Instruction parse(const char* instruction, HashMap* table) {
char* instr_copy = strdup(instruction);
Instruction result;
if (instr_copy[0] == '@') {
char* val_str = instr_copy + 1;
char* endptr;
long val = strtol(val_str, &endptr, 10);
if (*endptr == '\0') {
result.type = INSTRUCTION_A;
result.data.a.decimal = strdup(val_str);
} else {
Node* node = get(table, val_str);
if (node) {
char buf[32];
sprintf(buf, "%ld", node->value);
result.type = INTRUCTION_A;
result.data.a.decimal = strdup(buf);
} else {
// Finding max address logic simplified for brevity
int max_addr = 0;
for (int i = 0; i < table->size; i++) {
Node* curr = table->table[i];
while (curr) {
if (curr->address == ADDRESS_RAM && curr->value > max_addr) {
max_addr = curr->value;
}
curr = curr->next;
}
}
int next_addr = max_addr + 1;
char buf[32];
sprintf(buf, "%d", next_addr);
insert(table, val_str, next_addr, ADDRESS_RAM);
result.type = INSTRUCTION_A;
result.data.a.decimal = strdup(buf);
}
}
} else {
result.type = INSTRUCTION_C;
result.data.c.dest = NULL;
result.data.c.comp = NULL;
result.data.c.jump = NULL;
char* eq = strchr(instr_copy, '=');
char* semi = strchr(instr_copy, ';');
if (eq && semi) {
// dest=comp;jump
size_t dest_len = eq - instr_copy;
result.data.c.dest = strndup(instr_copy, dest_len);
size_t comp_len = semi - (eq + 1);
result.data.c.comp = strndup(eq + 1, comp_len);
result.data.c.jump = strdup(semi + 1);
} else if (eq && !semi) {
// dest=comp
size_t dest_len = eq - instr_copy;
result.data.c.dest = strndup(instr_copy, dest_len);
result.data.c.comp = strdup(eq + 1);
} else if (!eq && semi) {
// comp;jump
size_t comp_len = semi - instr_copy;
result.data.c.comp = strndup(instr_copy, comp_len);
result.data.c.jump = strdup(semi + 1);
} else {
// comp
result.data.c.comp = strdup(instr_copy);
}
}
free(instr_copy);
return result;
}
+34
View File
@@ -0,0 +1,34 @@
#ifndef TYPES_H
#define TYPES_H
#include <stdbool.h>
typedef struct {
char* decimal;
} AInstruction;
typedef struct {
char* dest;
char* comp;
char* jump;
} CInstruction;
typedef enum {
ADDRESS_RAM,
ADDRESS_ROM
} Address;
typedef enum {
INSTRUCTION_A,
INSTRUCTION_C
} InstructionType;
typedef struct {
InstructionType type;
union {
AInstruction a;
CInstruction c;
} data;
} Instruction;
#endif // TYPES_H