Nancy's ARM assembler outline
unknown
c_cpp
2 years ago
13 kB
19
Indexable
/////////////////////////////////// NRM //////////////////////////////////////// ///////////////////////////// NEW ARM ASSEMBLER //////////////////////////////// ////////////////////////// NANCY'S ARM ASSEMBLER /////////////////////////////// /* Public Domain (CC0) ARM Assembler. Free as in actual freedom. Goals: - ObjAsm compatibility - Portability - Simplicity - Speed (for JIT use) */ #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <stdarg.h> #define STB_DS_IMPLEMENTATION #include "stb_ds.h" //NOTE: SH tables by default expect user to do strdup #define arrdup(xs) ((xs) ? \ (void*)((uint8_t*)memcpy(malloc(sizeof(*stbds_header(xs)) \ + stbds_header(xs)->capacity*sizeof(xs[0])) \ , stbds_header(xs) \ ,(sizeof(*stbds_header(xs)) \ + stbds_header(xs)->capacity*sizeof(xs[0])) \ )+sizeof(*stbds_header(xs))) \ : 0) #define alen(a) arrlen(a) #define aput(a,v) arrput(a,v) #define apop(a) arrpop(a) #define alast(a) arrlast(a) #define adup(a) arrdup(a) typedef struct { //parsed instruction uint32_t dsc; //COND, S, P, I, U, S, W, L, OpCode uint32_t arg; //operands int lbl; //offset inside nrm_t->sl } nrm_nst_t; ///////////////////////////// SYMBOL TYPES ///////////////////////////////////// //none #define SYM_NON 0x00 //mnemonic #define SYM_MNM 0x01 //macro #define SYM_MCR 0x02 //register #define SYM_REG 0x03 //ObjAsm apparently uses AREA as a synonym for namespace typedef struct { char *name; } area_t; typedef struct { char *name; //index inside the name table uint32_t desc; //description of the symbol union { //we use x86 notation int i; uint8_t b; //byte uint16_t w; //word uint32_t d; //dword uint64_t q; //qword } v; //offset of the symbol } sym_t; #define SYM_TYPE(s) ((s)->desc&0xff) typedef struct { uint32_t flags; } nrm_opt_t; //assembler state typedef struct { char *key; sym_t *value; } *sym_tbl_t; typedef struct { sym_tbl_t st; //symbol map sym_t **sl; //symbol list nrm_opt_t opt; //user specified options } nrm_t; //assembler state #define NRM nrm_t *this //rotate right uint32_t ror(uint32_t x, uint32_t n) { return (x >> n) | (x << (32 - n)); } //decode immediate given 32bit opcode uint32_t dec_imm(uint32_t opcode) { uint32_t v = opcode & 0xff; uint32_t r = ((opcode >> 8) & 0xf) << 1; return ror(v, r); } ////////////////////// CONDITION CODES ///////////////////////////////////////// // Equal / Zero: Z == 1 #define NRM_EQ 0x0 // Not Equal / Not Zero: Z = 0 #define NRM_NE 0x1 //HS/CS - Carry Set / Unsigned Higher or Same: C == 1 #define NRM_CS 0x2 //LO/CC - Carry Clear / Unsigned Lower: C == 0 #define NRM_CC 0x3 // Minus / Negative: N==1 #define NRM_MI 0x4 // Plus / Positive or Zero: N==0 #define NRM_PL 0x5 // Overflow Set: V==1 #define NRM_VS 0x6 // Overflow Clear: V==0 #define NRM_VC 0x7 // Unsigned Higher: C == 1 && Z == 0 #define NRM_HI 0x8 // Unsigned Lower or Same: C == 0 || Z == 1 #define NRM_LS 0x9 // Greater or Equal: N == V #define NRM_GE 0xA // Less Than: N != V #define NRM_LT 0xB // Greater Than: Z == 0 && N == V #define NRM_GT 0xC // Less or Equal: Z == 1 || N != V #define NRM_LE 0xD // Always (unconditional) #define NRM_AL 0xE // Never (unconditionally false) #define NRM_NV 0xF //////////////////////// // Operation Codes //Rd - destionation //Rn - 1st operand //Rm - 2nd operand //Rd = Rn & Rm #define NRM_AND 0x0 //Rd = Rn ^ Rm #define NRM_EOR 0x1 //Rd = Rn - Rm #define NRM_SUB 0x2 //Rd = Rm - Rn #define NRM_RSB 0x3 //Rd = Rn + Rm #define NRM_ADD 0x4 //Rd = Rn + Rm + C #define NRM_ADC 0x5 //Rd = Rn - Rm - C #define NRM_SBC 0x6 //Rd = Rm - Rn - C #define NRM_RSC 0x7 //NZCV <- Rn & Rm //S should be always 1 #define NRM_TST 0x8 //NZCV <- Rn ^ Rm //S should be always 1 #define NRM_TEQ 0x9 //NZCV <- Rn - Rm //S should be always 1 #define NRM_CMP 0xA //NZCV <- Rn + Rm (compare negated) //S should be always 1 #define NRM_CMN 0xB //Rd = Rn | Rm #define NRM_ORR 0xC //Rd = Rm (Rn is ignored) #define NRM_MOV 0xD //Rd = Rn & ~Rm (bit clear) #define NRM_BIC 0xE //Rd = ~Rm (Rn is ignored) #define NRM_MVN 0xF #define NRM_B 0x10 #define NRM_BL 0x11 //for S suffixed opcodes #define NRM_S 0x10000 //immediate operand #define NRM_I 0x20000 #define OPC 0x80000000 #define NRM_NONE 0xFFFFFFFF #define NRM_ERROR 0xFFFFFFFE #define NRM_EOS 0xFFFFFFFD #define NRM_CND(dsc) (((dsc)>>12)&0xf) #define NRM_OPC(dsc) ((dsc)&0xfff) typedef struct { char *key; int32_t value; } ki_t; //key to integer value typedef struct { char *key; uint32_t value; } ku_t; //key to integer value static ki_t base_regs[] = { { "r0", 0},{ "r1", 1},{ "r2", 2},{ "r3", 3}, { "r4", 4},{ "r5", 5},{ "r6", 6},{ "r7", 7}, { "r8", 8},{ "r9", 9},{"r10",10},{"r11",11}, {"r12",12},{"r13",13},{"r14",14},{"r15",15}, { "R0", 0},{ "R1", 1},{ "R2", 2},{ "R3", 3}, { "R4", 4},{ "R5", 5},{ "R6", 6},{ "R7", 7}, { "R8", 8},{ "R9", 9},{"R10",10},{"R11",11}, {"R12",12},{"R13",13},{"R14",14},{"R15",15}, { "sp",13},{ "SP",13},{ "lr",14},{ "LR",14}, { "pc",15},{ "PC",15}, {0,0} }; static ki_t apcs_regs[] = { //RISC OS C ABI //objasm v3.27 apparently only includes the lower case names {"a1", 0}, {"a2", 1}, {"a3", 2}, {"a4", 3}, {"v1", 4}, {"v2", 5}, {"v3", 6}, {"v4", 7}, {"v8", 8}, {"v6", 9}, {"sl",10}, {"fp",11}, {"ip",12}, {"sp",13}, {"lr",14}, {"pc",15}, {0,0} }; static ku_t cnds[] = { {"eq", NRM_EQ}, {"ne", NRM_NE}, {"cs", NRM_CS}, {"cc", NRM_CC}, {"mi", NRM_MI}, {"pl", NRM_PL}, {"vs", NRM_VS}, {"vc", NRM_VC}, {"hi", NRM_HI}, {"ls", NRM_LS}, {"ge", NRM_GE}, {"lt", NRM_LT}, {"gt", NRM_GT}, {"le", NRM_LE}, {"al", NRM_AL}, {"nv", NRM_NV}, {"hs", NRM_CS}, {"lo", NRM_CC}, {0,0} }; static ku_t opcs[] = { {"b" , NRM_B }, {"bl" , NRM_BL }, {"and", NRM_AND}, {"eor", NRM_EOR}, {"sub", NRM_SUB}, {"rsb", NRM_RSB}, {"add", NRM_ADD}, {"adc", NRM_ADC}, {"sbc", NRM_SBC}, {"rsc", NRM_RSC}, {"tst", NRM_TST}, {"teq", NRM_TEQ}, {"cmp", NRM_CMP}, {"cnm", NRM_CMN}, {"orr", NRM_ORR}, {"mov", NRM_MOV}, {"bic", NRM_BIC}, {"mvn", NRM_MVN}, {0,0} }; ////////////////////////// GLOBAL VARIABLES //////////////////////////////////// static int globals_ready = 0; static ku_t *opctbl = NULL; // opcode table static sym_t *nrm_sref(NRM, char *name) { sym_t *s = shget(this->st, name); if (!s) { s = malloc(sizeof(sym_t)); s->desc = SYM_NON; s->name = strdup(name); memset(s, 0, sizeof(sym_t)); shput(this->st, name, s); aput(this->sl, s); } return s; } //input line is too large #define NRM_EBIG_INPUT 0x01 //less arguments than required #define NRM_ELESS_ARGS 0x02 //more arguments than required #define NRM_EMORE_ARGS 0x03 //less arguments than required #define NRM_EUNKNOWN 0x04 //unimplemented #define NRM_EUNIMPL 0x05 //less arguments than required #define NRM_EUNKARG0 0x06 #define NRM_EUNKARG1 0x07 #define NRM_EUNKARG2 0x08 #define ISEOL(x) (!(x) || (x)=='\n' || (x)==';') #define ISWS(x) ((x) == ' ' || (x) == '\t') #define SKPWS(p) while (ISWS(*p)) *p++ #define ISDL(x) (!ISEOL(x) && !ISWS(x) && (x) != ',') #define SKPDL(p) do { if (*p == ',') {*p++; SKPWS(p);} } while(0) //reader error #define RERR(type) do { \ nst->dsc = NRM_ERROR; \ nst->arg = (type); \ return p; \ } while (0) //read helper #define READ(dst,src) do { \ dst = pt; \ while (ISDL(*src)) { \ if (pt == te) RERR(NRM_EBIG_INPUT); \ *pt++ = *src++; \ } \ *pt++ = 0; \ SKPWS(src); \ } while (0) //Reads one instruction //On error nst->dsc gets set to NRM_ERROR and nst->arg gets set to error id //Otherwise returns pointer to the end of line. static char *nrm_read1(NRM, nrm_nst_t *nst, char *in) { #define TSZ 256 //FIXME: handle |labels| char t[TSZ]; //temporary area for readed mnemonic and args char *m, *l, *rd, *rn, *rm, *sh, *p = in, *q, *r, *s, *pt=t, *te = t+TSZ-1; int n = 0; //nargs read_label: //ObjAsm labels always being at start of line, //everything else gets indented. if (ISWS(*p)) {l = ""; SKPWS(p);} else READ(l,p); read_mnemonic: if (*p == '\n') { p++; if (*l) goto read_mnemonic; SKPWS(p); goto read_label; } if (!*p) { if (*l) nst->dsc = NRM_EOS; nst->arg = 0; //FIXME: allow label. return p; } //fprintf(stderr, "%s\n", m); READ(m,p); if (ISEOL(*p)) rd = ""; else {n++; READ(rd,p); SKPDL(p);} if (ISEOL(*p)) rn = ""; else {n++; READ(rn,p); SKPDL(p);} //FIXME: RM can include nested `,` inside [] and {} if (ISEOL(*p)) rm = ""; else {n++; READ(rm,p); SKPDL(p);} if (ISEOL(*p)) sh = ""; else {n++; READ(sh,p); SKPDL(p);} if (!ISEOL(*p)) RERR(NRM_EMORE_ARGS); if (*p == ';') while (!ISEOL(*p) || *p == ';') *p++; //skip comment till EOL printf("l=%s m=%s rd=%s rn=%s rm=%s sh=%s\n", l, m,rd,rn,rm,sh); uint32_t dsc = shget(opctbl, m); if (!dsc) RERR(NRM_EUNKNOWN); //fprintf(stderr, "%x\n", dsc); nst->dsc = dsc; switch (NRM_OPC(dsc)) { case NRM_AND: case NRM_EOR: case NRM_SUB: case NRM_RSB: case NRM_ADD: case NRM_ADC: case NRM_SBC: case NRM_RSC: case NRM_TST: case NRM_TEQ: case NRM_CMP: case NRM_CMN: case NRM_ORR: case NRM_MOV: case NRM_BIC: case NRM_MVN: if (n != 2) RERR(n > 2 ? NRM_EMORE_ARGS : NRM_ELESS_ARGS); sym_t *a0 = shget(this->st, rd); if (!a0 || a0->desc != SYM_REG) RERR(NRM_EUNKARG0); sym_t *a1 = shget(this->st, rn); if (!a1 || a1->desc != SYM_REG) RERR(NRM_EUNKARG1); nst->arg = (a0->v.i<<12) | a1->v.i; break; default: RERR(NRM_EUNIMPL); break; } return p; #undef TSZ } //assign's name to a register indexed by index void nrm_name_reg(NRM, char *name, int index) { sym_t *s = nrm_sref(this,name); s->desc = SYM_REG; s->v.i = index; } void upcase(char *d, char *s) { while ((*d++ = toupper(*s++))); } static void init_globals() { char tmp[8]; for (ku_t *o = opcs; o->key; o++) { shput(opctbl, o->key, o->value|OPC); upcase(tmp, o->key); shput(opctbl, strdup(tmp), o->value|OPC); sprintf(tmp, "%ss", o->key); shput(opctbl,strdup(tmp), o->value|NRM_S|OPC); upcase(tmp, tmp); shput(opctbl,strdup(tmp), o->value|NRM_S|OPC); for (ku_t *c = cnds; c->key; c++) { uint32_t cval = (c->value << 12) | o->value | OPC; sprintf(tmp, "%s%s", o->key, c->key); shput(opctbl, strdup(tmp), cval); upcase(tmp, tmp); shput(opctbl, strdup(tmp), cval); cval |= NRM_S; sprintf(tmp, "%ss%s", o->key, c->key); shput(opctbl, strdup(tmp), cval); upcase(tmp, tmp); shput(opctbl, strdup(tmp), cval); } } globals_ready = 1; } void nrm_init(NRM) { if (!globals_ready) init_globals(); this->st = NULL; this->sl = NULL; for (ki_t *rn = base_regs; rn->key; rn++) { nrm_name_reg(this, rn->key, rn->value); } } //encodes instruction and returs the number of bytes written to dst. //on error returns 0 uint8_t *nrm_enc(NRM, uint8_t *dst, nrm_nst_t *nst) { uint32_t dsc = nst->dsc; uint32_t opc = NRM_OPC(dsc); switch (opc) { case NRM_AND: case NRM_EOR: case NRM_SUB: case NRM_RSB: case NRM_ADD: case NRM_ADC: case NRM_SBC: case NRM_RSC: case NRM_TST: case NRM_TEQ: case NRM_CMP: case NRM_CMN: case NRM_ORR: case NRM_MOV: case NRM_BIC: case NRM_MVN: uint32_t r = NRM_CND(dsc)<<28; r |= opc<<21; if (dsc&NRM_S) r |= 1<<20; r |= nst->arg&0xFFFFF; dst[0] = r&0xFF; dst[1] = (r>>8)&0xFF; dst[2] = (r>>16)&0xFF; dst[3] = r>>24; break; } return dst+4; } void xd(uint8_t *p, int n, uint32_t opt) { int i; int ll = (opt&0xFF)+1; int j = 0; for (i = 0; i < n; i++) { printf("%02X", p[i]); char *e = " "; if (++j == ll || i+1 == n) { e = "\n"; j = 0; } printf(e); } } int main() { nrm_nst_t nst; nrm_t nrm; memset(&nrm, 0, sizeof(nrm_t)); nrm_init(&nrm); nrm_read1(&nrm, &nst, "label movsne r0, r1"); if (nst.dsc == NRM_ERROR) { printf("Read error: %d\n",nst.arg); return 0; } uint8_t tmp[1024]; uint8_t *p = tmp; p = nrm_enc(&nrm, p, &nst); xd(tmp, p-tmp, 16); return 0; }
Editor is loading...