Nancy's ARM assembler outline
unknown
c_cpp
2 years ago
13 kB
22
Indexable
/////////////////////////////////// NRM ////////////////////////////////////////
///////////////////////////// NEW ARM ASSEMBLER ////////////////////////////////
////////////////////////// NANCY'S ARM ASSEMBLER ///////////////////////////////
/*
Public Domain (CC0) ARM Assembler.
Free as in actual freedom.
Goals:
- ObjAsm compatibility
- Portability
- Simplicity
- Speed (for JIT use)
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#define STB_DS_IMPLEMENTATION
#include "stb_ds.h"
//NOTE: SH tables by default expect user to do strdup
#define arrdup(xs) ((xs) ? \
(void*)((uint8_t*)memcpy(malloc(sizeof(*stbds_header(xs)) \
+ stbds_header(xs)->capacity*sizeof(xs[0])) \
, stbds_header(xs) \
,(sizeof(*stbds_header(xs)) \
+ stbds_header(xs)->capacity*sizeof(xs[0])) \
)+sizeof(*stbds_header(xs))) \
: 0)
#define alen(a) arrlen(a)
#define aput(a,v) arrput(a,v)
#define apop(a) arrpop(a)
#define alast(a) arrlast(a)
#define adup(a) arrdup(a)
typedef struct { //parsed instruction
uint32_t dsc; //COND, S, P, I, U, S, W, L, OpCode
uint32_t arg; //operands
int lbl; //offset inside nrm_t->sl
} nrm_nst_t;
///////////////////////////// SYMBOL TYPES /////////////////////////////////////
//none
#define SYM_NON 0x00
//mnemonic
#define SYM_MNM 0x01
//macro
#define SYM_MCR 0x02
//register
#define SYM_REG 0x03
//ObjAsm apparently uses AREA as a synonym for namespace
typedef struct {
char *name;
} area_t;
typedef struct {
char *name; //index inside the name table
uint32_t desc; //description of the symbol
union { //we use x86 notation
int i;
uint8_t b; //byte
uint16_t w; //word
uint32_t d; //dword
uint64_t q; //qword
} v; //offset of the symbol
} sym_t;
#define SYM_TYPE(s) ((s)->desc&0xff)
typedef struct {
uint32_t flags;
} nrm_opt_t; //assembler state
typedef struct { char *key; sym_t *value; } *sym_tbl_t;
typedef struct {
sym_tbl_t st; //symbol map
sym_t **sl; //symbol list
nrm_opt_t opt; //user specified options
} nrm_t; //assembler state
#define NRM nrm_t *this
//rotate right
uint32_t ror(uint32_t x, uint32_t n) {
return (x >> n) | (x << (32 - n));
}
//decode immediate given 32bit opcode
uint32_t dec_imm(uint32_t opcode) {
uint32_t v = opcode & 0xff;
uint32_t r = ((opcode >> 8) & 0xf) << 1;
return ror(v, r);
}
////////////////////// CONDITION CODES /////////////////////////////////////////
// Equal / Zero: Z == 1
#define NRM_EQ 0x0
// Not Equal / Not Zero: Z = 0
#define NRM_NE 0x1
//HS/CS - Carry Set / Unsigned Higher or Same: C == 1
#define NRM_CS 0x2
//LO/CC - Carry Clear / Unsigned Lower: C == 0
#define NRM_CC 0x3
// Minus / Negative: N==1
#define NRM_MI 0x4
// Plus / Positive or Zero: N==0
#define NRM_PL 0x5
// Overflow Set: V==1
#define NRM_VS 0x6
// Overflow Clear: V==0
#define NRM_VC 0x7
// Unsigned Higher: C == 1 && Z == 0
#define NRM_HI 0x8
// Unsigned Lower or Same: C == 0 || Z == 1
#define NRM_LS 0x9
// Greater or Equal: N == V
#define NRM_GE 0xA
// Less Than: N != V
#define NRM_LT 0xB
// Greater Than: Z == 0 && N == V
#define NRM_GT 0xC
// Less or Equal: Z == 1 || N != V
#define NRM_LE 0xD
// Always (unconditional)
#define NRM_AL 0xE
// Never (unconditionally false)
#define NRM_NV 0xF
////////////////////////
// Operation Codes
//Rd - destionation
//Rn - 1st operand
//Rm - 2nd operand
//Rd = Rn & Rm
#define NRM_AND 0x0
//Rd = Rn ^ Rm
#define NRM_EOR 0x1
//Rd = Rn - Rm
#define NRM_SUB 0x2
//Rd = Rm - Rn
#define NRM_RSB 0x3
//Rd = Rn + Rm
#define NRM_ADD 0x4
//Rd = Rn + Rm + C
#define NRM_ADC 0x5
//Rd = Rn - Rm - C
#define NRM_SBC 0x6
//Rd = Rm - Rn - C
#define NRM_RSC 0x7
//NZCV <- Rn & Rm
//S should be always 1
#define NRM_TST 0x8
//NZCV <- Rn ^ Rm
//S should be always 1
#define NRM_TEQ 0x9
//NZCV <- Rn - Rm
//S should be always 1
#define NRM_CMP 0xA
//NZCV <- Rn + Rm (compare negated)
//S should be always 1
#define NRM_CMN 0xB
//Rd = Rn | Rm
#define NRM_ORR 0xC
//Rd = Rm (Rn is ignored)
#define NRM_MOV 0xD
//Rd = Rn & ~Rm (bit clear)
#define NRM_BIC 0xE
//Rd = ~Rm (Rn is ignored)
#define NRM_MVN 0xF
#define NRM_B 0x10
#define NRM_BL 0x11
//for S suffixed opcodes
#define NRM_S 0x10000
//immediate operand
#define NRM_I 0x20000
#define OPC 0x80000000
#define NRM_NONE 0xFFFFFFFF
#define NRM_ERROR 0xFFFFFFFE
#define NRM_EOS 0xFFFFFFFD
#define NRM_CND(dsc) (((dsc)>>12)&0xf)
#define NRM_OPC(dsc) ((dsc)&0xfff)
typedef struct {
char *key;
int32_t value;
} ki_t; //key to integer value
typedef struct {
char *key;
uint32_t value;
} ku_t; //key to integer value
static ki_t base_regs[] = {
{ "r0", 0},{ "r1", 1},{ "r2", 2},{ "r3", 3},
{ "r4", 4},{ "r5", 5},{ "r6", 6},{ "r7", 7},
{ "r8", 8},{ "r9", 9},{"r10",10},{"r11",11},
{"r12",12},{"r13",13},{"r14",14},{"r15",15},
{ "R0", 0},{ "R1", 1},{ "R2", 2},{ "R3", 3},
{ "R4", 4},{ "R5", 5},{ "R6", 6},{ "R7", 7},
{ "R8", 8},{ "R9", 9},{"R10",10},{"R11",11},
{"R12",12},{"R13",13},{"R14",14},{"R15",15},
{ "sp",13},{ "SP",13},{ "lr",14},{ "LR",14},
{ "pc",15},{ "PC",15},
{0,0}
};
static ki_t apcs_regs[] = { //RISC OS C ABI
//objasm v3.27 apparently only includes the lower case names
{"a1", 0}, {"a2", 1}, {"a3", 2}, {"a4", 3},
{"v1", 4}, {"v2", 5}, {"v3", 6}, {"v4", 7},
{"v8", 8}, {"v6", 9}, {"sl",10}, {"fp",11},
{"ip",12}, {"sp",13}, {"lr",14}, {"pc",15},
{0,0}
};
static ku_t cnds[] = {
{"eq", NRM_EQ}, {"ne", NRM_NE}, {"cs", NRM_CS}, {"cc", NRM_CC},
{"mi", NRM_MI}, {"pl", NRM_PL}, {"vs", NRM_VS}, {"vc", NRM_VC},
{"hi", NRM_HI}, {"ls", NRM_LS}, {"ge", NRM_GE}, {"lt", NRM_LT},
{"gt", NRM_GT}, {"le", NRM_LE}, {"al", NRM_AL}, {"nv", NRM_NV},
{"hs", NRM_CS}, {"lo", NRM_CC},
{0,0}
};
static ku_t opcs[] = {
{"b" , NRM_B }, {"bl" , NRM_BL },
{"and", NRM_AND}, {"eor", NRM_EOR}, {"sub", NRM_SUB}, {"rsb", NRM_RSB},
{"add", NRM_ADD}, {"adc", NRM_ADC}, {"sbc", NRM_SBC}, {"rsc", NRM_RSC},
{"tst", NRM_TST}, {"teq", NRM_TEQ}, {"cmp", NRM_CMP}, {"cnm", NRM_CMN},
{"orr", NRM_ORR}, {"mov", NRM_MOV}, {"bic", NRM_BIC}, {"mvn", NRM_MVN},
{0,0}
};
////////////////////////// GLOBAL VARIABLES ////////////////////////////////////
static int globals_ready = 0;
static ku_t *opctbl = NULL; // opcode table
static sym_t *nrm_sref(NRM, char *name) {
sym_t *s = shget(this->st, name);
if (!s) {
s = malloc(sizeof(sym_t));
s->desc = SYM_NON;
s->name = strdup(name);
memset(s, 0, sizeof(sym_t));
shput(this->st, name, s);
aput(this->sl, s);
}
return s;
}
//input line is too large
#define NRM_EBIG_INPUT 0x01
//less arguments than required
#define NRM_ELESS_ARGS 0x02
//more arguments than required
#define NRM_EMORE_ARGS 0x03
//less arguments than required
#define NRM_EUNKNOWN 0x04
//unimplemented
#define NRM_EUNIMPL 0x05
//less arguments than required
#define NRM_EUNKARG0 0x06
#define NRM_EUNKARG1 0x07
#define NRM_EUNKARG2 0x08
#define ISEOL(x) (!(x) || (x)=='\n' || (x)==';')
#define ISWS(x) ((x) == ' ' || (x) == '\t')
#define SKPWS(p) while (ISWS(*p)) *p++
#define ISDL(x) (!ISEOL(x) && !ISWS(x) && (x) != ',')
#define SKPDL(p) do { if (*p == ',') {*p++; SKPWS(p);} } while(0)
//reader error
#define RERR(type) do { \
nst->dsc = NRM_ERROR; \
nst->arg = (type); \
return p; \
} while (0)
//read helper
#define READ(dst,src) do { \
dst = pt; \
while (ISDL(*src)) { \
if (pt == te) RERR(NRM_EBIG_INPUT); \
*pt++ = *src++; \
} \
*pt++ = 0; \
SKPWS(src); \
} while (0)
//Reads one instruction
//On error nst->dsc gets set to NRM_ERROR and nst->arg gets set to error id
//Otherwise returns pointer to the end of line.
static char *nrm_read1(NRM, nrm_nst_t *nst, char *in) {
#define TSZ 256
//FIXME: handle |labels|
char t[TSZ]; //temporary area for readed mnemonic and args
char *m, *l, *rd, *rn, *rm, *sh, *p = in, *q, *r, *s, *pt=t, *te = t+TSZ-1;
int n = 0; //nargs
read_label:
//ObjAsm labels always being at start of line,
//everything else gets indented.
if (ISWS(*p)) {l = ""; SKPWS(p);} else READ(l,p);
read_mnemonic:
if (*p == '\n') {
p++;
if (*l) goto read_mnemonic;
SKPWS(p);
goto read_label;
}
if (!*p) {
if (*l)
nst->dsc = NRM_EOS;
nst->arg = 0;
//FIXME: allow label.
return p;
}
//fprintf(stderr, "%s\n", m);
READ(m,p);
if (ISEOL(*p)) rd = ""; else {n++; READ(rd,p); SKPDL(p);}
if (ISEOL(*p)) rn = ""; else {n++; READ(rn,p); SKPDL(p);}
//FIXME: RM can include nested `,` inside [] and {}
if (ISEOL(*p)) rm = ""; else {n++; READ(rm,p); SKPDL(p);}
if (ISEOL(*p)) sh = ""; else {n++; READ(sh,p); SKPDL(p);}
if (!ISEOL(*p)) RERR(NRM_EMORE_ARGS);
if (*p == ';') while (!ISEOL(*p) || *p == ';') *p++; //skip comment till EOL
printf("l=%s m=%s rd=%s rn=%s rm=%s sh=%s\n", l, m,rd,rn,rm,sh);
uint32_t dsc = shget(opctbl, m);
if (!dsc) RERR(NRM_EUNKNOWN);
//fprintf(stderr, "%x\n", dsc);
nst->dsc = dsc;
switch (NRM_OPC(dsc)) {
case NRM_AND: case NRM_EOR: case NRM_SUB: case NRM_RSB:
case NRM_ADD: case NRM_ADC: case NRM_SBC: case NRM_RSC:
case NRM_TST: case NRM_TEQ: case NRM_CMP: case NRM_CMN:
case NRM_ORR: case NRM_MOV: case NRM_BIC: case NRM_MVN:
if (n != 2) RERR(n > 2 ? NRM_EMORE_ARGS : NRM_ELESS_ARGS);
sym_t *a0 = shget(this->st, rd);
if (!a0 || a0->desc != SYM_REG) RERR(NRM_EUNKARG0);
sym_t *a1 = shget(this->st, rn);
if (!a1 || a1->desc != SYM_REG) RERR(NRM_EUNKARG1);
nst->arg = (a0->v.i<<12) | a1->v.i;
break;
default:
RERR(NRM_EUNIMPL);
break;
}
return p;
#undef TSZ
}
//assign's name to a register indexed by index
void nrm_name_reg(NRM, char *name, int index) {
sym_t *s = nrm_sref(this,name);
s->desc = SYM_REG;
s->v.i = index;
}
void upcase(char *d, char *s) {
while ((*d++ = toupper(*s++)));
}
static void init_globals() {
char tmp[8];
for (ku_t *o = opcs; o->key; o++) {
shput(opctbl, o->key, o->value|OPC);
upcase(tmp, o->key);
shput(opctbl, strdup(tmp), o->value|OPC);
sprintf(tmp, "%ss", o->key);
shput(opctbl,strdup(tmp), o->value|NRM_S|OPC);
upcase(tmp, tmp);
shput(opctbl,strdup(tmp), o->value|NRM_S|OPC);
for (ku_t *c = cnds; c->key; c++) {
uint32_t cval = (c->value << 12) | o->value | OPC;
sprintf(tmp, "%s%s", o->key, c->key);
shput(opctbl, strdup(tmp), cval);
upcase(tmp, tmp);
shput(opctbl, strdup(tmp), cval);
cval |= NRM_S;
sprintf(tmp, "%ss%s", o->key, c->key);
shput(opctbl, strdup(tmp), cval);
upcase(tmp, tmp);
shput(opctbl, strdup(tmp), cval);
}
}
globals_ready = 1;
}
void nrm_init(NRM) {
if (!globals_ready) init_globals();
this->st = NULL;
this->sl = NULL;
for (ki_t *rn = base_regs; rn->key; rn++) {
nrm_name_reg(this, rn->key, rn->value);
}
}
//encodes instruction and returs the number of bytes written to dst.
//on error returns 0
uint8_t *nrm_enc(NRM, uint8_t *dst, nrm_nst_t *nst) {
uint32_t dsc = nst->dsc;
uint32_t opc = NRM_OPC(dsc);
switch (opc) {
case NRM_AND: case NRM_EOR: case NRM_SUB: case NRM_RSB:
case NRM_ADD: case NRM_ADC: case NRM_SBC: case NRM_RSC:
case NRM_TST: case NRM_TEQ: case NRM_CMP: case NRM_CMN:
case NRM_ORR: case NRM_MOV: case NRM_BIC: case NRM_MVN:
uint32_t r = NRM_CND(dsc)<<28;
r |= opc<<21;
if (dsc&NRM_S) r |= 1<<20;
r |= nst->arg&0xFFFFF;
dst[0] = r&0xFF;
dst[1] = (r>>8)&0xFF;
dst[2] = (r>>16)&0xFF;
dst[3] = r>>24;
break;
}
return dst+4;
}
void xd(uint8_t *p, int n, uint32_t opt) {
int i;
int ll = (opt&0xFF)+1;
int j = 0;
for (i = 0; i < n; i++) {
printf("%02X", p[i]);
char *e = " ";
if (++j == ll || i+1 == n) {
e = "\n";
j = 0;
}
printf(e);
}
}
int main() {
nrm_nst_t nst;
nrm_t nrm;
memset(&nrm, 0, sizeof(nrm_t));
nrm_init(&nrm);
nrm_read1(&nrm, &nst, "label movsne r0, r1");
if (nst.dsc == NRM_ERROR) {
printf("Read error: %d\n",nst.arg);
return 0;
}
uint8_t tmp[1024];
uint8_t *p = tmp;
p = nrm_enc(&nrm, p, &nst);
xd(tmp, p-tmp, 16);
return 0;
}
Editor is loading...