Nancy's ARM assembler outline

mail@pastecode.io avatar
unknown
c_cpp
5 months ago
13 kB
13
Indexable
Never
/////////////////////////////////// NRM ////////////////////////////////////////
///////////////////////////// NEW ARM ASSEMBLER ////////////////////////////////
////////////////////////// NANCY'S ARM ASSEMBLER ///////////////////////////////


/*
Public Domain (CC0) ARM Assembler.
Free as in actual freedom.


Goals:
- ObjAsm compatibility
- Portability
- Simplicity
- Speed (for JIT use)
*/

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>

#define STB_DS_IMPLEMENTATION
#include "stb_ds.h"

//NOTE: SH tables by default expect user to do strdup

#define arrdup(xs) ((xs) ? \
     (void*)((uint8_t*)memcpy(malloc(sizeof(*stbds_header(xs)) \
                   + stbds_header(xs)->capacity*sizeof(xs[0])) \
           , stbds_header(xs) \
           ,(sizeof(*stbds_header(xs)) \
              + stbds_header(xs)->capacity*sizeof(xs[0])) \
           )+sizeof(*stbds_header(xs))) \
    : 0)


#define alen(a)   arrlen(a)
#define aput(a,v) arrput(a,v)
#define apop(a)   arrpop(a)
#define alast(a)  arrlast(a)
#define adup(a)   arrdup(a)

typedef struct { //parsed instruction
  uint32_t dsc; //COND, S, P, I, U, S, W, L, OpCode
  uint32_t arg; //operands
  int lbl;      //offset inside nrm_t->sl
} nrm_nst_t;


///////////////////////////// SYMBOL TYPES /////////////////////////////////////

//none
#define SYM_NON  0x00

//mnemonic
#define SYM_MNM  0x01

//macro
#define SYM_MCR  0x02

//register
#define SYM_REG  0x03


//ObjAsm apparently uses AREA as a synonym for namespace
typedef struct {
  char *name;

} area_t;


typedef struct {
  char *name; //index inside the name table
  uint32_t desc; //description of the symbol
  union { //we use x86 notation
    int      i;
    uint8_t  b; //byte
    uint16_t w; //word
    uint32_t d; //dword
    uint64_t q; //qword
  } v;  //offset of the symbol
} sym_t;


#define SYM_TYPE(s) ((s)->desc&0xff)

typedef struct {
  uint32_t flags; 
} nrm_opt_t; //assembler state

typedef struct { char *key; sym_t *value; } *sym_tbl_t;

typedef struct {
  sym_tbl_t st;   //symbol map
  sym_t **sl; //symbol list
  nrm_opt_t opt; //user specified options
} nrm_t; //assembler state


#define NRM nrm_t *this

//rotate right
uint32_t ror(uint32_t x, uint32_t n) {
	return (x >> n) | (x << (32 - n));
}

//decode immediate given 32bit opcode
uint32_t dec_imm(uint32_t opcode) {
	uint32_t v = opcode & 0xff;
	uint32_t r = ((opcode >> 8) & 0xf) << 1;
	return ror(v, r);
}


////////////////////// CONDITION CODES /////////////////////////////////////////

// Equal / Zero: Z == 1
#define NRM_EQ 0x0 

// Not Equal / Not Zero: Z = 0
#define NRM_NE 0x1

//HS/CS - Carry Set / Unsigned Higher or Same: C == 1
#define NRM_CS 0x2

//LO/CC - Carry Clear / Unsigned Lower: C == 0
#define NRM_CC 0x3

// Minus / Negative: N==1
#define NRM_MI 0x4

// Plus / Positive or Zero: N==0
#define NRM_PL 0x5

// Overflow Set: V==1
#define NRM_VS 0x6

// Overflow Clear: V==0
#define NRM_VC 0x7

// Unsigned Higher: C == 1 && Z == 0
#define NRM_HI 0x8

// Unsigned Lower or Same: C == 0 || Z == 1
#define NRM_LS 0x9

// Greater or Equal: N == V
#define NRM_GE 0xA

// Less Than: N != V
#define NRM_LT 0xB

// Greater Than: Z == 0 && N == V
#define NRM_GT 0xC

// Less or Equal: Z == 1 || N != V
#define NRM_LE 0xD 

// Always (unconditional)
#define NRM_AL 0xE

// Never (unconditionally false)
#define NRM_NV 0xF


////////////////////////
// Operation Codes
//Rd - destionation
//Rn - 1st operand
//Rm - 2nd operand

//Rd = Rn & Rm
#define NRM_AND 0x0

//Rd = Rn ^ Rm
#define NRM_EOR 0x1

//Rd = Rn - Rm
#define NRM_SUB 0x2

//Rd = Rm - Rn
#define NRM_RSB 0x3

//Rd = Rn + Rm
#define NRM_ADD 0x4

//Rd = Rn + Rm + C
#define NRM_ADC 0x5

//Rd = Rn - Rm - C
#define NRM_SBC 0x6

//Rd = Rm - Rn - C
#define NRM_RSC 0x7

//NZCV <- Rn & Rm
//S should be always 1
#define NRM_TST 0x8

//NZCV <- Rn ^ Rm 
//S should be always 1
#define NRM_TEQ 0x9

//NZCV <- Rn - Rm
//S should be always 1
#define NRM_CMP 0xA

//NZCV <- Rn + Rm (compare negated)
//S should be always 1
#define NRM_CMN 0xB

//Rd = Rn | Rm
#define NRM_ORR 0xC

//Rd = Rm (Rn is ignored)
#define NRM_MOV 0xD

//Rd = Rn & ~Rm (bit clear)
#define NRM_BIC 0xE

//Rd = ~Rm (Rn is ignored)
#define NRM_MVN 0xF



#define NRM_B  0x10
#define NRM_BL 0x11

//for S suffixed opcodes
#define NRM_S 0x10000

//immediate operand
#define NRM_I 0x20000

#define OPC 0x80000000

#define NRM_NONE  0xFFFFFFFF
#define NRM_ERROR 0xFFFFFFFE
#define NRM_EOS   0xFFFFFFFD


#define NRM_CND(dsc) (((dsc)>>12)&0xf)
#define NRM_OPC(dsc) ((dsc)&0xfff)


typedef struct {
  char *key;
  int32_t value;
} ki_t; //key to integer value

typedef struct {
  char *key;
  uint32_t value;
} ku_t; //key to integer value

static ki_t base_regs[] = {
  { "r0", 0},{ "r1", 1},{ "r2", 2},{ "r3", 3},
  { "r4", 4},{ "r5", 5},{ "r6", 6},{ "r7", 7},
  { "r8", 8},{ "r9", 9},{"r10",10},{"r11",11},
  {"r12",12},{"r13",13},{"r14",14},{"r15",15},
  { "R0", 0},{ "R1", 1},{ "R2", 2},{ "R3", 3},
  { "R4", 4},{ "R5", 5},{ "R6", 6},{ "R7", 7},
  { "R8", 8},{ "R9", 9},{"R10",10},{"R11",11},
  {"R12",12},{"R13",13},{"R14",14},{"R15",15},
  { "sp",13},{ "SP",13},{ "lr",14},{ "LR",14},
  { "pc",15},{ "PC",15},
  {0,0}
};

static ki_t apcs_regs[] = { //RISC OS C ABI
  //objasm v3.27 apparently only includes the lower case names
  {"a1", 0}, {"a2", 1}, {"a3", 2}, {"a4", 3},
  {"v1", 4}, {"v2", 5}, {"v3", 6}, {"v4", 7},
  {"v8", 8}, {"v6", 9}, {"sl",10}, {"fp",11},
  {"ip",12}, {"sp",13}, {"lr",14}, {"pc",15},
  {0,0}
};

static ku_t cnds[] = {
  {"eq", NRM_EQ}, {"ne", NRM_NE}, {"cs", NRM_CS}, {"cc", NRM_CC},
  {"mi", NRM_MI}, {"pl", NRM_PL}, {"vs", NRM_VS}, {"vc", NRM_VC},
  {"hi", NRM_HI}, {"ls", NRM_LS}, {"ge", NRM_GE}, {"lt", NRM_LT},
  {"gt", NRM_GT}, {"le", NRM_LE}, {"al", NRM_AL}, {"nv", NRM_NV},
  {"hs", NRM_CS}, {"lo", NRM_CC},
  {0,0}
};

static ku_t opcs[] = {
  {"b"  , NRM_B  }, {"bl" , NRM_BL },
  {"and", NRM_AND}, {"eor", NRM_EOR}, {"sub", NRM_SUB}, {"rsb", NRM_RSB},
  {"add", NRM_ADD}, {"adc", NRM_ADC}, {"sbc", NRM_SBC}, {"rsc", NRM_RSC},
  {"tst", NRM_TST}, {"teq", NRM_TEQ}, {"cmp", NRM_CMP}, {"cnm", NRM_CMN},
  {"orr", NRM_ORR}, {"mov", NRM_MOV}, {"bic", NRM_BIC}, {"mvn", NRM_MVN},
  {0,0}
};


////////////////////////// GLOBAL VARIABLES ////////////////////////////////////
static int globals_ready = 0;
static ku_t *opctbl = NULL; // opcode table



static sym_t *nrm_sref(NRM, char *name) {
  sym_t *s = shget(this->st, name);
  if (!s) {
    s = malloc(sizeof(sym_t));
    s->desc = SYM_NON;
    s->name = strdup(name);
    memset(s, 0, sizeof(sym_t));
    shput(this->st, name, s);
    aput(this->sl, s);
  }
  return s;
}



//input line is too large
#define NRM_EBIG_INPUT     0x01

//less arguments than required
#define NRM_ELESS_ARGS     0x02

//more arguments than required
#define NRM_EMORE_ARGS     0x03

//less arguments than required
#define NRM_EUNKNOWN       0x04

//unimplemented
#define NRM_EUNIMPL        0x05

//less arguments than required
#define NRM_EUNKARG0       0x06
#define NRM_EUNKARG1       0x07
#define NRM_EUNKARG2       0x08


#define ISEOL(x) (!(x) || (x)=='\n' || (x)==';')

#define ISWS(x) ((x) == ' ' || (x) == '\t')

#define SKPWS(p) while (ISWS(*p)) *p++

#define ISDL(x) (!ISEOL(x) && !ISWS(x) && (x) != ',')
#define SKPDL(p) do { if (*p == ',') {*p++; SKPWS(p);} } while(0)

//reader error
#define RERR(type) do {   \
    nst->dsc = NRM_ERROR; \
    nst->arg = (type);    \
    return p;             \
  } while (0)

//read helper
#define READ(dst,src) do {                             \
    dst = pt;                                          \
    while (ISDL(*src)) {                               \
      if (pt == te) RERR(NRM_EBIG_INPUT);              \
      *pt++ = *src++;                                  \
    }                                                  \
    *pt++ = 0;                                         \
    SKPWS(src);                                        \
  } while (0)


//Reads one instruction
//On error nst->dsc gets set to NRM_ERROR and nst->arg gets set to error id
//Otherwise returns pointer to the end of line.
static char *nrm_read1(NRM, nrm_nst_t *nst, char *in) {
#define TSZ 256
  //FIXME: handle |labels|
  char t[TSZ]; //temporary area for readed mnemonic and args
  char *m, *l, *rd, *rn, *rm, *sh, *p = in, *q, *r, *s, *pt=t, *te = t+TSZ-1;
  int n = 0; //nargs

read_label:
  //ObjAsm labels always being at start of line,
  //everything else gets indented.
  if (ISWS(*p)) {l = ""; SKPWS(p);} else READ(l,p);

read_mnemonic:
  if (*p == '\n') {
    p++;
    if (*l) goto read_mnemonic;
    SKPWS(p);
    goto read_label;
  }
  
  if (!*p) {
    if (*l)
    nst->dsc = NRM_EOS;
    nst->arg = 0;
    //FIXME: allow label.
    return p;
  }
  
  //fprintf(stderr, "%s\n", m);

  READ(m,p);

  if (ISEOL(*p)) rd = ""; else {n++; READ(rd,p); SKPDL(p);}
  if (ISEOL(*p)) rn = ""; else {n++; READ(rn,p); SKPDL(p);}
  //FIXME: RM can include nested `,` inside [] and {}
  if (ISEOL(*p)) rm = ""; else {n++; READ(rm,p); SKPDL(p);}
  if (ISEOL(*p)) sh = ""; else {n++; READ(sh,p); SKPDL(p);}

  if (!ISEOL(*p)) RERR(NRM_EMORE_ARGS);


  if (*p == ';') while (!ISEOL(*p) || *p == ';') *p++; //skip comment till EOL

  printf("l=%s m=%s rd=%s rn=%s rm=%s sh=%s\n", l, m,rd,rn,rm,sh);

  uint32_t dsc = shget(opctbl, m);
  if (!dsc) RERR(NRM_EUNKNOWN);

  //fprintf(stderr, "%x\n", dsc);

  nst->dsc = dsc;
  switch (NRM_OPC(dsc)) {
  case NRM_AND: case NRM_EOR: case NRM_SUB: case NRM_RSB:
  case NRM_ADD: case NRM_ADC: case NRM_SBC: case NRM_RSC:
  case NRM_TST: case NRM_TEQ: case NRM_CMP: case NRM_CMN:
  case NRM_ORR: case NRM_MOV: case NRM_BIC: case NRM_MVN:
    if (n != 2) RERR(n > 2 ? NRM_EMORE_ARGS : NRM_ELESS_ARGS);
    sym_t *a0 = shget(this->st, rd);
    if (!a0 || a0->desc != SYM_REG) RERR(NRM_EUNKARG0);
    sym_t *a1 = shget(this->st, rn);
    if (!a1 || a1->desc != SYM_REG) RERR(NRM_EUNKARG1);
    nst->arg = (a0->v.i<<12) | a1->v.i;
    break;
  default:
    RERR(NRM_EUNIMPL);
    break;
  }


  return p;
#undef TSZ
}



//assign's name to a register indexed by index
void nrm_name_reg(NRM, char *name, int index) {
  sym_t *s = nrm_sref(this,name);
  s->desc = SYM_REG;
  s->v.i = index;
}


void upcase(char *d, char *s) {
  while ((*d++ = toupper(*s++)));
}



static void init_globals() {
  char tmp[8];
  for (ku_t *o = opcs; o->key; o++) {
    shput(opctbl, o->key, o->value|OPC);
    upcase(tmp, o->key);
    shput(opctbl, strdup(tmp), o->value|OPC);

    sprintf(tmp, "%ss", o->key);
    shput(opctbl,strdup(tmp), o->value|NRM_S|OPC);
    upcase(tmp, tmp);
    shput(opctbl,strdup(tmp), o->value|NRM_S|OPC);

    for (ku_t *c = cnds; c->key; c++) {
      uint32_t cval = (c->value << 12) | o->value | OPC;
      sprintf(tmp, "%s%s", o->key, c->key);
      shput(opctbl, strdup(tmp), cval);
      upcase(tmp, tmp);
      shput(opctbl, strdup(tmp), cval);

      cval |= NRM_S;
      sprintf(tmp, "%ss%s", o->key, c->key);
      shput(opctbl, strdup(tmp), cval);
      upcase(tmp, tmp);
      shput(opctbl, strdup(tmp), cval);
    }
  }
  globals_ready = 1;
}

void nrm_init(NRM) {
  if (!globals_ready) init_globals();
  this->st = NULL;
  this->sl = NULL;

  for (ki_t *rn = base_regs; rn->key; rn++) {
    nrm_name_reg(this, rn->key, rn->value);
  }
}



//encodes instruction and returs the number of bytes written to dst.
//on error returns 0
uint8_t *nrm_enc(NRM, uint8_t *dst, nrm_nst_t *nst) {
  uint32_t dsc = nst->dsc;
  uint32_t opc = NRM_OPC(dsc);
  switch (opc) {
  case NRM_AND: case NRM_EOR: case NRM_SUB: case NRM_RSB:
  case NRM_ADD: case NRM_ADC: case NRM_SBC: case NRM_RSC:
  case NRM_TST: case NRM_TEQ: case NRM_CMP: case NRM_CMN:
  case NRM_ORR: case NRM_MOV: case NRM_BIC: case NRM_MVN:
    uint32_t r = NRM_CND(dsc)<<28;
    r |= opc<<21;
    if (dsc&NRM_S) r |= 1<<20;
    r |= nst->arg&0xFFFFF;
    dst[0] = r&0xFF;
    dst[1] = (r>>8)&0xFF;
    dst[2] = (r>>16)&0xFF;
    dst[3] = r>>24;

  break;
  }
  return dst+4;
}

void xd(uint8_t *p, int n, uint32_t opt) {
  int i;
  int ll = (opt&0xFF)+1;
  int j = 0;
  for (i = 0; i < n; i++) {
    printf("%02X", p[i]);
    char *e = " ";
    if (++j == ll || i+1 == n) {
      e = "\n";
      j = 0;
    }
    printf(e);
  }
}

int main() {
  nrm_nst_t nst;
  nrm_t nrm;
  memset(&nrm, 0, sizeof(nrm_t));
  nrm_init(&nrm);
  nrm_read1(&nrm, &nst, "label movsne r0, r1");
  if (nst.dsc == NRM_ERROR) {
    printf("Read error: %d\n",nst.arg);
    return 0;
  }

  uint8_t tmp[1024];
  uint8_t *p = tmp;
  p = nrm_enc(&nrm, p, &nst);
  
  xd(tmp, p-tmp, 16);


  return 0;
}