#include #include #include #include #include #include #include #include #include #include #include #include #define TOKMAXLEN 512 #define MAXSYMBOLS 512 typedef enum tok_type { TOK_EOF, TOK_ID, TOK_LABEL, TOK_NUM, TOK_OP, } toktype_t; struct instr { char *name; }; struct operand { enum { OP_REG, /* Register (also includes "special" regs) */ OP_IMM, /* Immediate */ OP_RELOC, /* * Symbol name to be relocated to an immediate. */ OP_ABS, /* Absolute: [addr] */ OP_IND, /* Indirect: [REG] */ OP_IDX, /* Indexed: [addr + REG] */ } type; int reg; union { int val; char *symbol; }; }; struct symbol { char *name; int addr; int operand; /* Which operand needs to be relocated. */ }; char toktext[TOKMAXLEN]; int toklen; int toklval; int line = 1; uint16_t out[0x10000]; int out_off; struct symbol symbols[MAXSYMBOLS]; int num_symbols; struct symbol relocs[MAXSYMBOLS]; int num_relocs; struct instr b_ops[16] = { [0x1] = { "SET" }, [0x2] = { "ADD" }, [0x3] = { "SUB" }, [0x4] = { "MUL" }, [0x5] = { "DIV" }, [0x6] = { "MOD" }, [0x7] = { "SHL" }, [0x8] = { "SHR" }, [0x9] = { "AND" }, [0xa] = { "BOR" }, [0xb] = { "XOR" }, [0xc] = { "IFE" }, [0xd] = { "IFN" }, [0xe] = { "IFG" }, [0xf] = { "IFB" }, }; struct instr nb_ops[64] = { [0x01] = { "JSR" }, }; static int isalphanum(char c) { return (isalpha(c) || ((c >= '0') && (c <= '9')) || (c == '_')); } toktype_t next_token(char *m, size_t size, off_t *_off) { off_t off; char c; int state; memset(toktext, 0, TOKMAXLEN); state = toklen = toklval = 0; off = *_off; while (off < size) { c = m[off]; switch (state) { case 0: if (c == ';') { state = 1; goto next; } else if ((c == ' ') || (c == '\t') || (c == '\n')) goto next; else if (c == ':') { state = 2; goto next; } else if (isalpha(c) || (c == '_')) { state = 3; goto storenext; } else if ((c >= '1') && (c <= '9')) { state = 4; goto storenext; } else if (c == '0') { state = 5; goto storenext; } else if ((c == ',') || (c == '[') || (c == ']') || (c == '+') || (c == '-')) { toktext[toklen++] = c; *_off = off + 1; return (TOK_OP); } else errx(1, "Error: line %d Unexpected input 0x%x " " state %d\n", line, c, state); goto next; /* ; */ case 1: if (c != '\n') goto next; state = 0; goto next; /* : */ case 2: if (isalphanum(c)) goto storenext; *_off = off; return (TOK_LABEL); /* [A-Za-z_] */ case 3: if (isalphanum(c)) goto storenext; *_off = off; return (TOK_ID); /* [1-9] */ /* XXX negative numbers */ case 4: if ((c >= '0') && (c <= '9')) goto storenext; *_off = off; toklval = strtol(toktext, NULL, 0); return (TOK_NUM); case 5: if ((c == 'x') || ((c >= '0') && (c <= '9'))) { state = 6; goto storenext; } *_off = off; toklval = 0; return (TOK_NUM); case 6: if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F'))) goto storenext; *_off = off; toklval = strtol(toktext, NULL, 16); return (TOK_NUM); default: errx(1, "Error: line %d: Unknown state: %d\n", line, state); } storenext: toktext[toklen++] = c; next: off++; if (c == '\n') line++; } return (TOK_EOF); } void add_symbol(char *symbol, int len, int addr) { char *name; int i; for (i = 0; i < num_symbols; i++) { if (strncmp(symbol, symbols[i].name, len) == 0) errx(1, "Error: line %d: Symbol %s already exists.\n", line, symbol); } name = malloc(len + 1); strncpy(name, symbol, len + 1); symbols[num_symbols].name = name; symbols[num_symbols].addr = addr; num_symbols++; } int strtoreg(char *s) { if (!strcmp(s, "A")) return (0); else if (!strcmp(s, "B")) return (1); else if (!strcmp(s, "C")) return (2); else if (!strcmp(s, "X")) return (3); else if (!strcmp(s, "Y")) return (4); else if (!strcmp(s, "Z")) return (5); else if (!strcmp(s, "I")) return (6); else if (!strcmp(s, "J")) return (7); /* The following values are for the "special" operands. */ else if (!strcmp(s, "POP")) return (0x18); else if (!strcmp(s, "PEEK")) return (0x19); else if (!strcmp(s, "PUSH")) return (0x1a); else if (!strcmp(s, "SP")) return (0x1b); else if (!strcmp(s, "PC")) return (0x1c); else if (!strcmp(s, "O")) return (0x1d); else return (-1); } int operand_value(struct operand *op) { switch (op->type) { case OP_REG: return (op->reg); case OP_IMM: if ((op->val >= 0) && (op->val <= 0x1f)) return (0x20 + op->val); return (0x1f); case OP_RELOC: return (0x1f); case OP_ABS: return (0x1e); case OP_IND: if (op->reg >= 0x8) errx(1, "Error: line %d: Invalid register %d\n", line, op->reg); return (0x8 + op->reg); case OP_IDX: if (op->reg >= 0x8) errx(1, "Error: line %d: Invalid register %d\n", line, op->reg); return (0x10 + op->reg); default: errx(1, "Error: line %d: Unknown operand type %d\n", line, op->type); } return (0xff); } int find_symbol(char *name, int *addr) { int i; for (i = 0; i < num_symbols; i++) { if (strcmp(symbols[i].name, name) == 0) { *addr = symbols[i].addr; return (0); } } return (1); } void apply_relocs() { int addr, i; for (i = 0; i < num_relocs; i++) { if (find_symbol(relocs[i].name, &addr) != 0) errx(1, "Error: Unknown symbol \"%s\"\n", relocs[i].name); out[relocs[i].addr] = addr; } } void emit_operand(struct operand *op) { if ((op->type == OP_ABS) || (op->type == OP_IDX) || ((op->type == OP_IMM) && ((op->val < 0) || (op->val > 0x1f)))) { out[out_off++] = op->val; } else if (op->type == OP_RELOC) { relocs[num_relocs].name = op->symbol; relocs[num_relocs].addr = out_off; num_relocs++; out_off++; } } void emit_nonbasic(int opcode, struct operand *dst) { int16_t w; w = opcode << 4; w |= operand_value(dst) << 10; out[out_off++] = w; emit_operand(dst); } void emit_basic(int opcode, struct operand *src, struct operand *dst) { int16_t w; w = opcode; w |= operand_value(src) << 4; w |= operand_value(dst) << 10; out[out_off++] = w; emit_operand(src); emit_operand(dst); } void operand(char *m, size_t size, off_t *_off, struct operand *op) { toktype_t type; off_t off; char *symbol; int reg; off = *_off; if ((type = next_token(m, size, &off)) == TOK_EOF) errx(1, "Error: line %d: Unexpected EOF\n", line); if (type == TOK_ID) { reg = strtoreg(toktext); /* Symbol */ if (reg < 0) { op->type = OP_RELOC; symbol = malloc(toklen + 1); strncpy(symbol, toktext, toklen + 1); op->symbol = symbol; goto out; } op->type = OP_REG; op->reg = reg; } else if (type == TOK_NUM) { op->type = OP_IMM; op->val = toklval; } else if ((type == TOK_OP) && (*toktext == '[')) { if ((type = next_token(m, size, &off)) == TOK_EOF) errx(1, "Error: line %d: Unexpected EOF\n", line); if ((type == TOK_ID) && ((reg = strtoreg(toktext)) >= 0)) { if (reg >= 8) errx(1, "Error: line %d: Only regular registers" " can be used\n", line); op->type = OP_IND; op->reg = reg; if (((type = next_token(m, size, &off)) != TOK_OP) || (*toktext != ']')) errx(1, "Error: line %d: Expected ']', " " got \"%s\"", line, toktext); goto out; } else if (type == TOK_NUM) { op->val = toklval; if ((type = next_token(m, size, &off)) == TOK_EOF) errx(1, "Error: line %d: Unexpected EOF\n", line); if ((type == TOK_OP) && (*toktext == '+')) { if (((type = next_token(m, size, &off)) != TOK_ID) || ((reg = strtoreg(toktext)) < 0)) errx(1, "Error: line %d: Expected " "register\n", line); if (((type = next_token(m, size, &off)) != TOK_OP) || (*toktext != ']')) errx(1, "Error: line %d: Expected ']', " " got \"%s\"", line, toktext); op->reg = reg; op->type = OP_IDX; goto out; } else if ((type == TOK_OP) && (*toktext == ']')) { op->type = OP_ABS; goto out; } } else errx(1, "not implemented"); } else errx(1, "Error: line %d: Expected operand, got \"%s\"\n", line, toktext); out: *_off = off; } int instr(char *m, size_t size, off_t *_off) { struct operand dst, src; toktype_t type; off_t off; int i, opcode; memset(&src, 0, sizeof(struct operand)); memset(&dst, 0, sizeof(struct operand)); off = *_off; opcode = 0; for (i = 0; i < 16; i++) { if ((b_ops[i].name != NULL) && (strncmp(toktext, b_ops[i].name, toklen) == 0)) { opcode = i; break; } } if (opcode == 0) { for (i = 0; i < 0x40; i++) { if ((nb_ops[i].name != NULL) && (strncmp(toktext, nb_ops[i].name, toklen) == 0)) { opcode = i; break; } } if (opcode == 0) errx(1, "Error: line %d: Expected instruction, got " "\"%s\"\n", line, toktext); operand(m, size, &off, &dst); emit_nonbasic(opcode, &dst); } else { operand(m, size, &off, &src); type = next_token(m, size, &off); if (type != TOK_OP && *toktext != ',') errx(1, "Error: line %d: Expected ',', got \"%s\"\n", line, toktext); operand(m, size, &off, &dst); emit_basic(opcode, &src, &dst); } *_off = off; return (0); } void assemble(char *m, size_t size) { toktype_t type; off_t off; int i; off = 0; while (off < size) { if ((type = next_token(m, size, &off)) == TOK_EOF) break; if (type == TOK_ID) instr(m, size, &off); else if (type == TOK_LABEL) add_symbol(toktext, toklen, out_off); else errx(1, "Error: line %d: Unexpected token \"%s\"\n", line, toktext); } apply_relocs(); for (i = 0; i < out_off; i++) { if (i % 8 == 0) printf("\n%.4hx:", i); printf(" %.4hx", out[i]); } printf("\n"); } int main(int argc, char **argv) { struct stat sb; char *m; int fd; if (argc < 3) errx(1, "Usage: %s \n", argv[0]); if ((fd = open(argv[1], O_RDONLY)) < 0) err(1, "open"); if (fstat(fd, &sb) < 0) err(1, "fstat"); if ((m = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == MAP_FAILED) err(1, "mmap"); assemble(m, sb.st_size); if ((fd = open(argv[2], O_WRONLY | O_CREAT, 0644)) < 0) err(1, "open"); if (write(fd, out, out_off * 2) < 0) err(1, "write"); return (0); }