diff --git a/interpreters/luajit/.gitignore b/interpreters/luajit/.gitignore new file mode 100644 index 00000000000..335ec9573de --- /dev/null +++ b/interpreters/luajit/.gitignore @@ -0,0 +1 @@ +*.tar.gz diff --git a/interpreters/luajit/0001-luajit-armv7m-nuttx.patch b/interpreters/luajit/0001-luajit-armv7m-nuttx.patch new file mode 100644 index 00000000000..85ac92e15f6 --- /dev/null +++ b/interpreters/luajit/0001-luajit-armv7m-nuttx.patch @@ -0,0 +1,10326 @@ +diff --git a/Makefile b/Makefile +index b0288b4d..f387077c 100644 +--- a/Makefile ++++ b/Makefile +@@ -90,7 +90,7 @@ FILE_MAN= luajit.1 + FILE_PC= luajit.pc + FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h + FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ +- dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ ++ dis_x86.lua dis_x64.lua dis_arm.lua dis_armv7m.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua vmdef.lua + +diff --git a/dynasm/dasm_armv7m.h b/dynasm/dasm_armv7m.h +new file mode 100644 +index 00000000..8f94ba40 +--- /dev/null ++++ b/dynasm/dasm_armv7m.h +@@ -0,0 +1,563 @@ ++/* ++** DynASM ARM encoding engine. ++** Copyright (C) 2018 Jernej Turnsek. All rights reserved. ++** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Released under the MIT license. See dynasm.lua for full copyright notice. ++*/ ++ ++#include ++#include ++#include ++#include ++ ++#define DASM_ARCH "armv7m" ++ ++#ifndef DASM_EXTERN ++#define DASM_EXTERN(a,b,c,d) 0 ++#endif ++ ++/* Action definitions. */ ++enum { ++ DASM_STOP, ++ DASM_SECTION, ++ DASM_ESC, ++ DASM_REL_EXT, ++ /* The following actions need a buffer position. */ ++ DASM_ALIGN, ++ DASM_REL_LG, ++ DASM_LABEL_LG, ++ /* The following actions also have an argument. */ ++ DASM_REL_PC, ++ DASM_LABEL_PC, ++ DASM_IMM, ++ DASM_IMM12, ++ DASM_IMM16, ++ DASM_IMML8, ++ DASM_IMML12, ++ DASM_IMMV8, ++ DASM__MAX ++}; ++ ++/* Maximum number of section buffer positions for a single dasm_put() call. */ ++#define DASM_MAXSECPOS 25 ++ ++/* DynASM encoder status codes. Action list offset or number are or'ed in. */ ++#define DASM_S_OK 0x00000000 ++#define DASM_S_NOMEM 0x01000000 ++#define DASM_S_PHASE 0x02000000 ++#define DASM_S_MATCH_SEC 0x03000000 ++#define DASM_S_RANGE_I 0x11000000 ++#define DASM_S_RANGE_SEC 0x12000000 ++#define DASM_S_RANGE_LG 0x13000000 ++#define DASM_S_RANGE_PC 0x14000000 ++#define DASM_S_RANGE_REL 0x15000000 ++#define DASM_S_UNDEF_LG 0x21000000 ++#define DASM_S_UNDEF_PC 0x22000000 ++ ++/* Macros to convert positions (8 bit section + 24 bit index). */ ++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) ++#define DASM_POS2BIAS(pos) ((pos)&0xff000000) ++#define DASM_SEC2POS(sec) ((sec)<<24) ++#define DASM_POS2SEC(pos) ((pos)>>24) ++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) ++ ++/* Action list type. */ ++typedef const unsigned int *dasm_ActList; ++ ++/* Per-section structure. */ ++typedef struct dasm_Section { ++ int *rbuf; /* Biased buffer pointer (negative section bias). */ ++ int *buf; /* True buffer pointer. */ ++ size_t bsize; /* Buffer size in bytes. */ ++ int pos; /* Biased buffer position. */ ++ int epos; /* End of biased buffer position - max single put. */ ++ int ofs; /* Byte offset into section. */ ++} dasm_Section; ++ ++/* Core structure holding the DynASM encoding state. */ ++struct dasm_State { ++ size_t psize; /* Allocated size of this structure. */ ++ dasm_ActList actionlist; /* Current actionlist pointer. */ ++ int *lglabels; /* Local/global chain/pos ptrs. */ ++ size_t lgsize; ++ int *pclabels; /* PC label chains/pos ptrs. */ ++ size_t pcsize; ++ void **globals; /* Array of globals (bias -10). */ ++ dasm_Section *section; /* Pointer to active section. */ ++ size_t codesize; /* Total size of all code sections. */ ++ int maxsection; /* 0 <= sectionidx < maxsection. */ ++ int status; /* Status code. */ ++ dasm_Section sections[1]; /* All sections. Alloc-extended. */ ++}; ++ ++/* The size of the core structure depends on the max. number of sections. */ ++#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) ++ ++ ++/* Initialize DynASM state. */ ++void dasm_init(Dst_DECL, int maxsection) ++{ ++ dasm_State *D; ++ size_t psz = 0; ++ int i; ++ Dst_REF = NULL; ++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); ++ D = Dst_REF; ++ D->psize = psz; ++ D->lglabels = NULL; ++ D->lgsize = 0; ++ D->pclabels = NULL; ++ D->pcsize = 0; ++ D->globals = NULL; ++ D->maxsection = maxsection; ++ for (i = 0; i < maxsection; i++) { ++ D->sections[i].buf = NULL; /* Need this for pass3. */ ++ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); ++ D->sections[i].bsize = 0; ++ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ ++ } ++} ++ ++/* Free DynASM state. */ ++void dasm_free(Dst_DECL) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ for (i = 0; i < D->maxsection; i++) ++ if (D->sections[i].buf) ++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); ++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); ++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); ++ DASM_M_FREE(Dst, D, D->psize); ++} ++ ++/* Setup global label array. Must be called before dasm_setup(). */ ++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) ++{ ++ dasm_State *D = Dst_REF; ++ D->globals = gl - 10; /* Negative bias to compensate for locals. */ ++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl)*sizeof(int)); ++} ++ ++/* Grow PC label array. Can be called after dasm_setup(), too. */ ++void dasm_growpc(Dst_DECL, unsigned int maxpc) ++{ ++ dasm_State *D = Dst_REF; ++ size_t osz = D->pcsize; ++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); ++ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz); ++} ++ ++/* Setup encoder. */ ++void dasm_setup(Dst_DECL, const void *actionlist) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ D->actionlist = (dasm_ActList)actionlist; ++ D->status = DASM_S_OK; ++ D->section = &D->sections[0]; ++ memset((void *)D->lglabels, 0, D->lgsize); ++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); ++ for (i = 0; i < D->maxsection; i++) { ++ D->sections[i].pos = DASM_SEC2POS(i); ++ D->sections[i].ofs = 0; ++ } ++} ++ ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) { \ ++ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) ++#define CKPL(kind, st) \ ++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ ++ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) ++#else ++#define CK(x, st) ((void)0) ++#define CKPL(kind, st) ((void)0) ++#endif ++ ++static int dasm_imm12(unsigned int n) ++{ ++ int i; ++ unsigned int m = n; ++ ++ if (m <= 255) { ++ /* i:imm3 = 0000 */ ++ return ((((m) & 0xff) << 16) | (((m) & 0x700) << 20) | (((m) & 0x800) >> 1)); ++ } ++ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) { ++ /* i:imm3 = 0001 */ ++ return ((((0x100 | (m & 0xff)) & 0xff) << 16) | (((0x100 | (m & 0xff)) & 0x700) << 20) | (((0x100 | (m & 0xff)) & 0x800) >> 1)); ++ } ++ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) { ++ /* i:imm3 = 0010 */ ++ return ((((0x200 | (m >> 8 & 0xff)) & 0xff) << 16) | (((0x200 | (m >> 8 & 0xff)) & 0x700) << 20) | (((0x200 | (m >> 8 & 0xff)) & 0x800) >> 1)); ++ } ++ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) { ++ /* i:imm3 = 0011 */ ++ return ((((0x300 | (m & 0xff)) & 0xff) << 16) | (((0x300 | (m & 0xff)) & 0x700) << 20) | (((0x300 | (m & 0xff)) & 0x800) >> 1)); ++ } ++ else { ++ for (i = 0; i < 4096; i += 128, m = ((m << 1) | (m >> (-(unsigned int)(1)&(8*sizeof(m) - 1))))) { ++ if (m <= 255) { ++ if ((m & 0x80) && (i >= 128 * 8)) ++ return ((((i | (m & 0x7f)) & 0xff) << 16) | (((i | (m & 0x7f)) & 0x700) << 20) | (((i | (m & 0x7f)) & 0x800) >> 1)); ++ else ++ continue; ++ } ++ } ++ } ++ if (n < 4096) { ++ return -2; /* Used for additional encoding of add/sub TODO: better solution! */ ++ } ++ return -1; ++} ++ ++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ ++void dasm_put(Dst_DECL, int start, ...) ++{ ++ va_list ap; ++ dasm_State *D = Dst_REF; ++ dasm_ActList p = D->actionlist + start; ++ dasm_Section *sec = D->section; ++ int pos = sec->pos, ofs = sec->ofs; ++ int *b; ++ ++ if (pos >= sec->epos) { ++ DASM_M_GROW(Dst, ++ int, ++ sec->buf, ++ sec->bsize, ++ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); ++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos); ++ sec->epos = (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos); ++ } ++ ++ b = sec->rbuf; ++ b[pos++] = start; ++ ++ va_start(ap, start); ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16); ++ if (action >= DASM__MAX) { ++ ofs += 4; ++ } ++ else { ++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; ++ switch (action) { ++ case DASM_STOP: goto stop; ++ case DASM_SECTION: ++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); ++ D->section = &D->sections[n]; goto stop; ++ case DASM_ESC: p++; ofs += 4; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; ++ case DASM_REL_LG: ++ n = (ins & 2047) - 10; pl = D->lglabels + n; ++ /* Bkwd rel or global. */ ++ if (n >= 0) { CK(n >= 10 || *pl < 0, RANGE_LG); CKPL(lg, LG); goto putrel; } ++ pl += 10; n = *pl; ++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ ++ goto linkrel; ++ case DASM_REL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++putrel: ++ n = *pl; ++ if (n < 0) { ++ /* Label exists. Get label pos and store it. */ ++ b[pos] = -n; ++ } ++ else { ++linkrel: ++ b[pos] = n; /* Else link to rel chain, anchored at label. */ ++ *pl = pos; ++ } ++ pos++; ++ break; ++ case DASM_LABEL_LG: ++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; ++ case DASM_LABEL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++putlabel: ++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } ++ *pl = -pos; /* Label exists now. */ ++ b[pos++] = ofs; /* Store pass1 offset estimate. */ ++ break; ++ case DASM_IMM: ++ case DASM_IMM16: ++#ifdef DASM_CHECKS ++ CK((n & ((1 << ((ins >> 10) & 31)) - 1)) == 0, RANGE_I); ++ if ((ins & 0x8000)) ++ CK(((n + (1 << (((ins >> 5) & 31) - 1))) >> ((ins >> 5) & 31)) == 0, RANGE_I); ++ else ++ CK((n >> ((ins >> 5) & 31)) == 0, RANGE_I); ++#endif ++ b[pos++] = n; ++ break; ++ case DASM_IMMV8: ++ CK((n & 3) == 0, RANGE_I); ++ n >>= 2; ++ /* fallthrough */ ++ case DASM_IMML8: ++ case DASM_IMML12: ++ CK(n >= 0 ? ((n >> ((ins >> 5) & 31)) == 0) : ++ (((-n) >> ((ins >> 5) & 31)) == 0), ++ RANGE_I); ++ b[pos++] = n; ++ break; ++ case DASM_IMM12: ++ CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); ++ b[pos++] = n; ++ break; ++ } ++ } ++ } ++stop: ++ va_end(ap); ++ sec->pos = pos; ++ sec->ofs = ofs; ++} ++#undef CK ++ ++/* Pass 2: Link sections, shrink aligns, fix label offsets. */ ++int dasm_link(Dst_DECL, size_t *szp) ++{ ++ dasm_State *D = Dst_REF; ++ int secnum; ++ int ofs = 0; ++ ++#ifdef DASM_CHECKS ++ *szp = 0; ++ if (D->status != DASM_S_OK) return D->status; ++ { ++ int pc; ++ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) ++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC | pc; ++ } ++#endif ++ ++ { ++ /* Handle globals not defined in this translation unit. */ ++ int idx; ++ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { ++ int n = D->lglabels[idx]; ++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } ++ } ++ } ++ ++ /* Combine all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->rbuf; ++ int pos = DASM_SEC2POS(secnum); ++ int lastpos = sec->pos; ++ ++ while (pos != lastpos) { ++ dasm_ActList p = D->actionlist + b[pos++]; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16); ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: p++; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; ++ case DASM_REL_LG: case DASM_REL_PC: pos++; break; ++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; ++ case DASM_IMM: case DASM_IMM12: case DASM_IMM16: ++ case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break; ++ } ++ } ++stop: (void)0; ++ } ++ ofs += sec->ofs; /* Next section starts right after current section. */ ++ } ++ ++ D->codesize = ofs; /* Total size of all code sections */ ++ *szp = ofs; ++ return DASM_S_OK; ++} ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) ++#else ++#define CK(x, st) ((void)0) ++#endif ++ ++/* Pass 3: Encode sections. */ ++int dasm_encode(Dst_DECL, void *buffer) ++{ ++ dasm_State *D = Dst_REF; ++ char *base = (char *)buffer; ++ unsigned int *cp = (unsigned int *)buffer; ++ int secnum; ++ ++ /* Encode all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->buf; ++ int *endb = sec->rbuf + sec->pos; ++ ++ while (b != endb) { ++ dasm_ActList p = D->actionlist + *b++; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16); ++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; ++ switch (action) { ++ case DASM_STOP: ++ case DASM_SECTION: ++ goto stop; ++ case DASM_ESC: ++ //*cp++ = *p++; //jturnsek: do I need to swap this also? ++ *cp++ = ((*p >> 16) & 0x0000ffff) | ((*p << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */ ++ p++; ++ break; ++ case DASM_REL_EXT: ++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), !(ins & 2048)); ++ goto patchrel; ++ case DASM_ALIGN: ++ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x8000f3af; /* jturnsek: NOP.W */ ++ break; ++ case DASM_REL_LG: ++ CK(n >= 0, UNDEF_LG); ++ /* fallthrough */ ++ case DASM_REL_PC: ++ CK(n >= 0, UNDEF_PC); ++ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); ++patchrel: ++ if ((ins & 0x800) == 0) { ++ /* jturnsek: B or BL */ ++ if (cp[-1] & 0x10000000) { ++ /* BL */ ++ CK((n & 1) == 0 && ((n + 0x01000000) >> 25) == 0, RANGE_REL); ++ cp[-1] |= ((((n & 0x1000000) >> 24) & 0x1) << 10) | ++ (((~((n & 0x800000) >> 23) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 29) | ++ (((~((n & 0x400000) >> 22) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 27) | ++ ((n >> 12) & 0x3ff) | ++ (((n >> 1) & 0x7ff) << 16); ++ } ++ else { ++ /* B (T3) */ ++ CK((n & 1) == 0 && ((n + 0x00100000) >> 21) == 0, RANGE_REL); ++ cp[-1] |= ((((n & 0x100000) >> 20) & 0x1) << 10) | ++ ((((n & 0x80000) >> 19) & 0x1) << 27) | ++ ((((n & 0x40000) >> 18) & 0x1) << 29) | ++ ((n >> 12) & 0x3f) | ++ (((n >> 1) & 0x7ff) << 16); ++ } ++ } ++ else if ((ins & 0x1000)) { ++ CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL); ++ goto patchimml8; ++ } ++ else if ((ins & 0x2000) == 0) { ++ CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL); ++ goto patchimml; ++ } ++ else { ++ CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL); ++ n >>= 2; ++ goto patchimmv; ++ } ++ break; ++ case DASM_LABEL_LG: ++ ins &= 2047; if (ins >= 20) D->globals[ins - 10] = (void *)(base + n); ++ break; ++ case DASM_LABEL_PC: ++ break; ++ case DASM_IMM: ++ if (((ins >> 5) & 31) == 2) { ++ /* 2 bit shift for load/store lsl */ ++ cp[-1] |= ((n & 0x3) << 20); ++ } ++ else { ++ /* 5 bit shift */ ++ cp[-1] |= ((n & 0x3) << 22) | ((n & 0x1c) << 26); ++ } ++ //cp[-1] |= ((n >> ((ins >> 10) & 31)) & ((1 << ((ins >> 5) & 31)) - 1)) << (ins & 31); ++ break; ++ case DASM_IMM12: ++ if (dasm_imm12((unsigned int)n) == -2) { ++ cp[-1] ^= 0x00000300; ++ cp[-1] &= ~0x00000010; ++ cp[-1] |= ((((n) & 0xff) << 16) | (((n) & 0x700) << 20) | (((n) & 0x800) >> 1)); ++ } ++ else { ++ cp[-1] |= dasm_imm12((unsigned int)n); ++ } ++ break; ++ case DASM_IMM16: ++ cp[-1] |= ((n & 0xf000) >> 12) | ++ ((n & 0x0800) >> 1) | ++ ((n & 0x0700) << 20) | ++ ((n & 0x00ff) << 16); ++ break; ++ case DASM_IMML8: ++patchimml8: ++ cp[-1] |= n >= 0 ? (0x02000000 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16); ++ break; ++ case DASM_IMML12: ++patchimml: ++ cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xfff) << 16)) : ((-n & 0xfff) << 16); ++ if (((cp[-1] & 0x0000000f) != 0x0000000f) && (n < 0)) { ++ CK(-255 <= n && n < 0, RANGE_I); ++ cp[-1] &= ~0x03000000; ++ cp[-1] |= 0x0c000000; ++ } ++ break; ++ case DASM_IMMV8: ++patchimmv: ++ cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16); ++ break; ++ default: ++ *cp++ = ((ins >> 16) & 0x0000ffff) | ((ins << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */ ++ break; ++ } ++ } ++stop: (void)0; ++ } ++ } ++ ++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */ ++ return DASM_S_PHASE; ++ return DASM_S_OK; ++} ++#undef CK ++ ++/* Get PC label offset. */ ++int dasm_getpclabel(Dst_DECL, unsigned int pc) ++{ ++ dasm_State *D = Dst_REF; ++ if (pc*sizeof(int) < D->pcsize) { ++ int pos = D->pclabels[pc]; ++ if (pos < 0) return *DASM_POS2PTR(D, -pos); ++ if (pos > 0) return -1; /* Undefined. */ ++ } ++ return -2; /* Unused or out of range. */ ++} ++ ++#ifdef DASM_CHECKS ++/* Optional sanity checker to call between isolated encoding steps. */ ++int dasm_checkstep(Dst_DECL, int secmatch) ++{ ++ dasm_State *D = Dst_REF; ++ if (D->status == DASM_S_OK) { ++ int i; ++ for (i = 1; i <= 9; i++) { ++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG | i; break; } ++ D->lglabels[i] = 0; ++ } ++ } ++ if (D->status == DASM_S_OK && secmatch >= 0 && ++ D->section != &D->sections[secmatch]) ++ D->status = DASM_S_MATCH_SEC | (D->section - D->sections); ++ return D->status; ++} ++#endif ++ +diff --git a/dynasm/dasm_armv7m.lua b/dynasm/dasm_armv7m.lua +new file mode 100644 +index 00000000..8e877d26 +--- /dev/null ++++ b/dynasm/dasm_armv7m.lua +@@ -0,0 +1,1010 @@ ++------------------------------------------------------------------------------ ++-- DynASM ARMV7M module. ++-- ++-- Copyright (C) 2018 Jernej Turnsek. All rights reserved. ++-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- See dynasm.lua for full copyright notice. ++------------------------------------------------------------------------------ ++ ++-- Module information: ++local _info = { ++ arch = "armv7m", ++ description = "DynASM ARMV7M module", ++ version = "1.4.0", ++ vernum = 10400, ++ release = "2018-12-07", ++ author = "Jernej Turnsek", ++ license = "MIT", ++} ++ ++-- Exported glue functions for the arch-specific module. ++local _M = { _info = _info } ++ ++-- Cache library functions. ++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs ++local assert, setmetatable, rawget = assert, setmetatable, rawget ++local _s = string ++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char ++local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub ++local concat, sort, insert = table.concat, table.sort, table.insert ++local bit = bit or require("bit") ++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift ++local ror, tohex = bit.ror, bit.tohex ++local bxor = bit.bxor ++ ++-- Inherited tables and callbacks. ++local g_opt, g_arch ++local wline, werror, wfatal, wwarn ++ ++-- Action name list. ++-- CHECK: Keep this in sync with the C code! ++local action_names = { ++ "STOP", "SECTION", "ESC", "REL_EXT", ++ "ALIGN", "REL_LG", "LABEL_LG", ++ "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8", ++} ++ ++-- Maximum number of section buffer positions for dasm_put(). ++-- CHECK: Keep this in sync with the C code! ++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. ++ ++-- Action name -> action number. ++local map_action = {} ++for n,name in ipairs(action_names) do ++ map_action[name] = n-1 ++end ++ ++-- Action list buffer. ++local actlist = {} ++ ++-- Argument list for next dasm_put(). Start with offset 0 into action list. ++local actargs = { 0 } ++ ++-- Current number of section buffer positions for dasm_put(). ++local secpos = 1 ++ ++------------------------------------------------------------------------------ ++ ++-- Dump action names and numbers. ++local function dumpactions(out) ++ out:write("DynASM encoding engine action codes:\n") ++ for n,name in ipairs(action_names) do ++ local num = map_action[name] ++ out:write(format(" %-10s %02X %d\n", name, num, num)) ++ end ++ out:write("\n") ++end ++ ++-- Write action list buffer as a huge static C array. ++local function writeactions(out, name) ++ local nn = #actlist ++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end ++ out:write("static const unsigned int ", name, "[", nn, "] = {\n") ++ for i = 1,nn-1 do ++ assert(out:write("0x", tohex(actlist[i]), ",\n")) ++ end ++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Add word to action list. ++local function wputxw(n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[#actlist+1] = n ++end ++ ++-- Add action to list with optional arg. Advance buffer pos, too. ++local function waction(action, val, a, num) ++ local w = assert(map_action[action], "bad action name `"..action.."'") ++ wputxw(w * 0x10000 + (val or 0)) ++ if a then actargs[#actargs+1] = a end ++ if a or num then secpos = secpos + (num or 1) end ++end ++ ++-- Flush action list (intervening C code or buffer pos overflow). ++local function wflush(term) ++ if #actlist == actargs[1] then return end -- Nothing to flush. ++ if not term then waction("STOP") end -- Terminate action list. ++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) ++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). ++ secpos = 1 -- The actionlist offset occupies a buffer position, too. ++end ++ ++-- Put escaped word. ++local function wputw(n) ++ if n <= 0x000fffff then waction("ESC") end ++ wputxw(n) ++end ++ ++-- Reserve position for word. ++local function wpos() ++ local pos = #actlist+1 ++ actlist[pos] = "" ++ return pos ++end ++ ++-- Store word to reserved position. ++local function wputpos(pos, n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ if n <= 0x000fffff then ++ insert(actlist, pos+1, n) ++ n = map_action.ESC * 0x10000 ++ end ++ actlist[pos] = n ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Global label name -> global label number. With auto assignment on 1st use. ++local next_global = 20 ++local map_global = setmetatable({}, { __index = function(t, name) ++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end ++ local n = next_global ++ if n > 2047 then werror("too many global labels") end ++ next_global = n + 1 ++ t[name] = n ++ return n ++end}) ++ ++-- Dump global labels. ++local function dumpglobals(out, lvl) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("Global labels:\n") ++ for i=20,next_global-1 do ++ out:write(format(" %s\n", t[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write global label enum. ++local function writeglobals(out, prefix) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("enum {\n") ++ for i=20,next_global-1 do ++ out:write(" ", prefix, t[i], ",\n") ++ end ++ out:write(" ", prefix, "_MAX\n};\n") ++end ++ ++-- Write global label names. ++local function writeglobalnames(out, name) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=20,next_global-1 do ++ out:write(" \"", t[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Extern label name -> extern label number. With auto assignment on 1st use. ++local next_extern = 0 ++local map_extern_ = {} ++local map_extern = setmetatable({}, { __index = function(t, name) ++ -- No restrictions on the name for now. ++ local n = next_extern ++ if n > 2047 then werror("too many extern labels") end ++ next_extern = n + 1 ++ t[name] = n ++ map_extern_[n] = name ++ return n ++end}) ++ ++-- Dump extern labels. ++local function dumpexterns(out, lvl) ++ out:write("Extern labels:\n") ++ for i=0,next_extern-1 do ++ out:write(format(" %s\n", map_extern_[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write extern label names. ++local function writeexternnames(out, name) ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=0,next_extern-1 do ++ out:write(" \"", map_extern_[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Arch-specific maps. ++ ++-- Ext. register name -> int. name. ++local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } ++ ++-- Int. register name -> ext. name. ++local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } ++ ++local map_type = {} -- Type name -> { ctype, reg } ++local ctypenum = 0 -- Type number (for Dt... macros). ++ ++-- Reverse defines for registers. ++function _M.revdef(s) ++ return map_reg_rev[s] or s ++end ++ ++local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } ++ ++local map_cond = { ++ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, ++ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, ++ hs = 2, lo = 3, ++} ++ ++------------------------------------------------------------------------------ ++ ++-- Template strings for ARM instructions. ++-- jturnsek: dasm_encode will do the swap of half-words!!! ++local map_op = { ++ and_3 = "ea000000DNPs", ++ eor_3 = "ea800000DNPs", ++ sub_3 = "eba00000DNPs", ++ rsb_3 = "ebc00000DNPs", ++ add_3 = "eb000000DNPs", ++ sbc_3 = "eb600000DNPs", ++ tst_2 = "ea100f00NP", ++ cmp_2 = "ebb00f00NP", ++ cmn_2 = "eb100f00NP", ++ orr_3 = "ea400000DNPs", ++ mov_2 = "ea4f0000DPs", ++ bic_3 = "ea200000DNPs", ++ mvn_2 = "ea6f0000DPs", ++ ++ and_4 = "ea000000DNMps", ++ eor_4 = "ea800000DNMps", ++ sub_4 = "eba00000DNMps", ++ rsb_4 = "ebc00000DNMps", ++ add_4 = "eb000000DNMps", ++ sbc_4 = "eb600000DNMps", ++ tst_3 = "ea100f00NMp", ++ cmp_3 = "ebb00f00NMp", ++ cmn_3 = "eb100f00NMp", ++ orr_4 = "ea400000DNMps", ++ mov_3 = "ea4f0000DMps", ++ bic_4 = "ea200000DNMps", ++ mvn_3 = "ea6f0000DMps", ++ ++ lsl_3 = "ea400000DNws", ++ lsr_3 = "ea400010DNws", ++ asr_3 = "ea400020DNws", ++ ror_3 = "ea400030DNws", ++ ++ smull_4 = "fb800000SDNM", ++ ++ clz_2 = "fab0f080Da", -- a is used for Consistent(M) ++ rbit_2 = "fa90f0a0Da", -- a is used for Consistent(M) ++ ++ str_2 = "f8400000SL", str_3 = "f8400000SL", str_4 = "f8400000SL", ++ strb_2 = "f8000000SL", strb_3 = "f8000000SL", strb_4 = "f8000000SL", ++ ldr_2 = "f8500000SL", ldr_3 = "f8500000SL", ldr_4 = "f8500000SL", ++ ldrb_2 = "f8100000SL", ldrb_3 = "f8100000SL", ldrb_4 = "f8100000SL", ++ strh_2 = "f8200000SL", strh_3 = "f8200000SL", ++ ldrh_2 = "f8300000SL", ldrh_3 = "f8300000SL", ++ ldrd_3 = "e8500000SDL", ldrd_4 = "e8500000SDL", ++ strd_3 = "e8400000SDL", strd_4 = "e8400000SDL", ++ ++ ldm_2 = "e8900000oR", ++ pop_1 = "e8bd0000R", ++ push_1 = "e92d0000R", ++ ++ b_1 = "f0009000B", ++ bl_1 = "f000d000B", ++ bx_1 = "bf004700C", ++ blx_1 = "bf004780C", ++ ++ nop_0 = "f3af8000", ++ bkpt_1 = "bf00be00K", ++ ++ ["vadd.f64_3"] = "ee300b00Gdnm", ++ ["vsub.f64_3"] = "ee300b40Gdnm", ++ ["vmul.f64_3"] = "ee200b00Gdnm", ++ ["vdiv.f64_3"] = "ee800b00Gdnm", ++ ["vcmp.f64_2"] = "eeb40b40Gdm", ++ ["vcvt.f64.s32_2"] = "eeb80bc0GdFm", ++ ["vsqrt.f64_2"] = "eeb10bc0Gdm", ++ ++ vldr_2 = "ed100a00dl|ed100b00Gdl", ++ vstr_2 = "ed000a00dl|ed000b00Gdl", ++ vldm_2 = "ec900a00or", ++ vpop_1 = "ecbd0a00r", ++ vstmdb_2 = "ed000a00or", ++ vpush_1 = "ed2d0a00r", ++ ++ ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY", ++ vmov_2 = "ee100a10Sn|ee000a10nS", ++ vmov_3 = "ec500a10SNm|ec400a10mSN|ec500b10GSNm|ec400b10GmSN", ++ vmrs_0 = "eef1fa10", ++ ++ it_1 = "bf00bf08c", ++ ite_1 = "bf00bf04c", ++ itt_1 = "bf00bf04c", ++ ittt_1 = "bf00bf02c", ++ itttt_1 = "bf00bf01c", ++ iteee_1 = "bf00bf01c", ++} ++ ++-- Add mnemonics for "s" variants. ++do ++ local t = {} ++ for k,v in pairs(map_op) do ++ if sub(v, -1) == "s" then ++ local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) ++ t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 ++ end ++ end ++ for k,v in pairs(t) do ++ map_op[k] = v ++ end ++end ++ ++------------------------------------------------------------------------------ ++ ++local function parse_gpr(expr) ++ local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") ++ local tp = map_type[tname or expr] ++ if tp then ++ local reg = ovreg or tp.reg ++ if not reg then ++ werror("type `"..(tname or expr).."' needs a register override") ++ end ++ expr = reg ++ end ++ local r = match(expr, "^r(1?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 15 then return r, tp end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_gpr_pm(expr) ++ local pm, expr2 = match(expr, "^([+-]?)(.*)$") ++ return parse_gpr(expr2), (pm == "-") ++end ++ ++local function parse_vr(expr, tp) ++ local t, r = match(expr, "^([sd])([0-9]+)$") ++ if t == tp then ++ r = tonumber(r) ++ if r <= 31 then ++ if t == "s" then return shr(r, 1), band(r, 1) end ++ return band(r, 15), shr(r, 4) ++ end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_reglist(reglist) ++ reglist = match(reglist, "^{%s*([^}]*)}$") ++ if not reglist then werror("register list expected") end ++ local rr = 0 ++ for p in gmatch(reglist..",", "%s*([^,]*),") do ++ local rbit = shl(1, parse_gpr(gsub(p, "%s+$", ""))) ++ if band(rr, rbit) ~= 0 then ++ werror("duplicate register `"..p.."'") ++ end ++ rr = rr + rbit ++ end ++ return rr ++end ++ ++local function parse_vrlist(reglist) ++ local ta, ra, tb, rb = match(reglist, ++ "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$") ++ ra, rb = tonumber(ra), tonumber(rb) ++ if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then ++ local nr = rb + 1 - ra ++ if ta == "s" then ++ return shl(shr(ra, 1), 12) + shl(band(ra, 1), 22) + nr ++ else ++ return shl(band(ra, 15), 12) + shl(shr(ra, 4), 22) + nr * 2 + 0x100 ++ end ++ end ++ werror("register list expected") ++end ++ ++local function parse_imm(imm, bits, shift, scale, signed) ++ imm = match(imm, "^#(.*)$") ++ if not imm then werror("expected immediate operand") end ++ local n = tonumber(imm) ++ if n then ++ local m = sar(n, scale) ++ if shl(m, scale) == n then ++ if signed then ++ local s = sar(m, bits-1) ++ if s == 0 then return shl(m, shift) ++ elseif s == -1 then return shl(m + shl(1, bits), shift) end ++ else ++ if sar(m, bits) == 0 then return shl(m, shift) end ++ end ++ end ++ werror("out of range immediate `"..imm.."'") ++ else ++ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) ++ return 0 ++ end ++end ++ ++local function parse_imm12(imm) ++ local n = tonumber(imm) ++ if n then ++ if n <= 255 then ++ return band(n, 0xff) ++ elseif band(n, 0xff00ff00) == 0 and band(shr(n, 16), 0xff) == band(n, 0xff) then ++ return band(n, 0xff) + shl(1, 12) ++ elseif band(n, 0x00ff00ff) == 0 and band(shr(n, 16), 0xff00) == band(n, 0xff00) then ++ return band(shr(n, 8), 0xff) + shl(2, 12) ++ elseif band(shr(n, 24), 0xff) == band(n, 0xff) and ++ band(shr(n, 16), 0xff) == band(n, 0xff) and ++ band(shr(n, 8), 0xff) == band(n, 0xff) then ++ return band(n, 0xff) + shl(3, 12) ++ else ++ for i=31, 8, -1 do ++ n = ror(n, 1) ++ if n >= 128 and n <= 255 then ++ return shl(band(i, 0x10), 22) + shl(band(i, 0x0e), 11) + shl(band(i, 0x01), 7) + band(n, 0x7f) ++ end ++ end ++ end ++ werror("out of range immediate `"..imm.."'") ++ else ++ waction("IMM12", 0, imm) ++ return 0 ++ end ++end ++ ++local function parse_imm16(imm) ++ imm = match(imm, "^#(.*)$") ++ if not imm then werror("expected immediate operand") end ++ local n = tonumber(imm) ++ if n then ++ if shr(n, 16) == 0 then ++ return band(n, 0x00ff) + shl(band(n, 0x0700), 4) + shl(band(n, 0x0800), 15) + shl(band(n, 0xf000), 4) ++ end ++ werror("out of range immediate `"..imm.."'") ++ else ++ waction("IMM16", 32*16, imm) ++ return 0 ++ end ++end ++ ++local function parse_imm_load(imm, ext, flags) ++ local n = tonumber(imm) ++ local p, w = match(flags, "P"), match(flags, "W") ++ if n then ++ if ext then ++ if n >= -1020 and n <= 1020 then ++ local up = 0x00800000 ++ if n < 0 then n = -n; up = 0 end ++ return n/4 + up + (p and 0x01000000 or 0) + (w and 0x00200000 or 0) ++ end ++ else ++ if w then ++ if n >= -255 and n <= 255 then ++ if n >= 0 then ++ return n + 0x00000a00 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0) ++ else ++ return -n + 0x00000800 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0) ++ end ++ end ++ else ++ if n >= 0 and n <= 4095 then ++ return n + 0x00800000 ++ elseif n >= -255 and n < 0 then ++ return -n + 0x00000800 + (p and 0x00000400 or 0) ++ end ++ end ++ end ++ werror("out of range immediate `"..imm.."'") ++ else ++ waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12), imm) ++ local pw = 0 ++ if p then pw = (ext and 0x01000000 or 0) end ++ if w then pw = (ext and 0x00200000 or 0) end ++ return pw ++ end ++end ++ ++local function parse_shift(shift) ++ if shift == "rrx" then ++ return 3 * 16 ++ else ++ local s, s2 = match(shift, "^(%S+)%s*(.*)$") ++ s = map_shift[s] ++ if not s then werror("expected shift operand") end ++ if sub(s2, 1, 1) == "#" then ++ local imm = parse_imm(s2, 5, 0, 0, false) ++ return shl(band(imm, 0x1c), 10) + shl(band(imm, 0x03), 6) + shl(s, 4) ++ else ++ werror("expected immediate shift operand") ++ end ++ end ++end ++ ++local function parse_shift_load(shift) ++ if not match(shift, "lsl", 1) then ++ werror("expected lsl shift operand") ++ else ++ local s, s2 = match(shift, "^(%S+)%s*(.*)$") ++ if not s then werror("expected shift operand") end ++ if sub(s2, 1, 1) == "#" then ++ return parse_imm(s2, 2, 4, 0, false) ++ else ++ werror("expected immediate shift operand") ++ end ++ end ++end ++ ++local function parse_label(label, def) ++ local prefix = sub(label, 1, 2) ++ -- =>label (pc label reference) ++ if prefix == "=>" then ++ return "PC", 0, sub(label, 3) ++ end ++ -- ->name (global label reference) ++ if prefix == "->" then ++ return "LG", map_global[sub(label, 3)] ++ end ++ if def then ++ -- [1-9] (local label definition) ++ if match(label, "^[1-9]$") then ++ return "LG", 10+tonumber(label) ++ end ++ else ++ -- [<>][1-9] (local label reference) ++ local dir, lnum = match(label, "^([<>])([1-9])$") ++ if dir then -- Fwd: 1-9, Bkwd: 11-19. ++ return "LG", lnum + (dir == ">" and 0 or 10) ++ end ++ -- extern label (extern label reference) ++ local extname = match(label, "^extern%s+(%S+)$") ++ if extname then ++ return "EXT", map_extern[extname] ++ end ++ end ++ werror("bad label `"..label.."'") ++end ++ ++local function parse_load(params, nparams, n, op) ++ local ext = (band(op, 0x10000000) == 0) ++ local pn = params[n] ++ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") ++ local p2 = params[n+1] ++ if not p1 then ++ if not p2 then ++ if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then ++ local mode, n, s = parse_label(pn, false) ++ waction("REL_"..mode, n + (ext and 0x2800 or 0x0800), s, 1) ++ return op + 15 * 65536 + (ext and 0x01000000 or 0) --set P if ext==true ++ end ++ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") ++ if reg and tailr ~= "" then ++ local d, tp = parse_gpr(reg) ++ if tp then ++ waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12), ++ format(tp.ctypefmt, tailr)) ++ return op + shl(d, 16) + (ext and 0x01000000 or 0) --set P if ext==true, using imm12 if ext==false ++ end ++ end ++ end ++ werror("expected address operand") ++ end ++ if p2 then ++ if wb == "!" then werror("bad use of '!'") end ++ local p3 = params[n+2] ++ op = op + shl(parse_gpr(p1), 16) ++ local imm = match(p2, "^#(.*)$") ++ if imm then ++ if p3 then werror("too many parameters") end ++ op = op + parse_imm_load(imm, ext, "W") --always imm8, set W ++ else ++ if ext then werror("not in ARMV7M") end ++ op = op + parse_gpr(p2) ++ if p3 then op = op + parse_shift_load(p3) end ++ end ++ else ++ local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") ++ op = op + shl(parse_gpr(p1a), 16) ++ if p2 ~= "" then ++ local imm = match(p2, "^,%s*#(.*)$") ++ if imm then ++ op = op + parse_imm_load(imm, ext, (wb == "!" and "PW" or "P")) --set P (and W) ++ else ++ local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") ++ if ext then werror("not in ARMV7M") end ++ op = op + parse_gpr(p2a) ++ if p3 ~= "" then ++ op = op + parse_shift_load(p3) ++ end ++ end ++ else ++ if wb == "!" then werror("bad use of '!'") end ++ op = op + (ext and 0x01000000 or 0) + 0x00800000 --no imm, thus using imm12 if ext==false, set U ++ end ++ end ++ return op ++end ++ ++local function parse_vload(q) ++ local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$") ++ if reg then ++ local d = shl(parse_gpr(reg), 16) ++ if imm == "" then return d end ++ imm = match(imm, "^,%s*#(.*)$") ++ if imm then ++ local n = tonumber(imm) ++ if n then ++ if n >= -1020 and n <= 1020 and n%4 == 0 then ++ return d + (n >= 0 and n/4+0x00800000 or -n/4) ++ end ++ werror("out of range immediate `"..imm.."'") ++ else ++ waction("IMMV8", 32768 + 32*8, imm) ++ return d ++ end ++ end ++ else ++ if match(q, "^[<>=%-]") or match(q, "^extern%s+") then ++ local mode, n, s = parse_label(q, false) ++ waction("REL_"..mode, n + 0x2800, s, 1) ++ return 15 * 65536 ++ end ++ local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$") ++ if reg and tailr ~= "" then ++ local d, tp = parse_gpr(reg) ++ if tp then ++ waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr)) ++ return shl(d, 16) ++ end ++ end ++ end ++ werror("expected address operand") ++end ++ ++local function parse_it(name, cond) ++ local mask, it = 0, match(name, "it", 1) ++ if not it then ++ werror("not IT instruction") ++ end ++ local it2 = sub(name, 3, -1) ++ if not it2 then ++ return shl(map_cond[cond], 4) ++ end ++ local shift = 3 ++ for p in gmatch(it2, "[te]") do ++ if p == "t" then ++ mask = mask + shl(band(map_cond[cond], 1), shift) ++ elseif p == "e" then ++ mask = mask + shl(band(bxor(map_cond[cond], 15), 1), shift) ++ else ++ werror("wrong syntax") ++ end ++ if shift ~= 0 then shift = shift - 1 end ++ end ++ return shl(map_cond[cond], 4) + mask ++end ++ ++ ++------------------------------------------------------------------------------ ++ ++-- Handle opcodes defined with template strings. ++local function parse_template(params, template, nparams, pos) ++ local op = tonumber(sub(template, 1, 8), 16) ++ local n = 1 ++ local vr = "s" ++ ++ -- Process each character. ++ for p in gmatch(sub(template, 9), ".") do ++ local q = params[n] ++ if p == "D" then ++ op = op + shl(parse_gpr(q), 8); n = n + 1 ++ elseif p == "N" then ++ op = op + shl(parse_gpr(q), 16); n = n + 1 ++ elseif p == "S" then ++ op = op + shl(parse_gpr(q), 12); n = n + 1 ++ elseif p == "M" then ++ op = op + parse_gpr(q); n = n + 1 ++ elseif p == "a" then ++ local m = parse_gpr(q) ++ op = op + m + shl(m, 16); n = n + 1 ++ elseif p == "d" then ++ local r,h = parse_vr(q, vr); op = op + shl(r, 12) + shl(h, 22); n = n + 1 ++ elseif p == "n" then ++ local r,h = parse_vr(q, vr); op = op + shl(r, 16) + shl(h, 7); n = n + 1 ++ elseif p == "m" then ++ local r,h = parse_vr(q, vr); op = op + r + shl(h, 5); n = n + 1 ++ elseif p == "P" then ++ local imm = match(q, "^#(.*)$") ++ if imm then ++ op = op + 0x6000000 + parse_imm12(imm) ++ else ++ op = op + parse_gpr(q) ++ end ++ n = n + 1 ++ elseif p == "p" then ++ op = op + parse_shift(q); n = n + 1 ++ elseif p == "L" then ++ op = parse_load(params, nparams, n, op) ++ elseif p == "l" then ++ op = op + parse_vload(q) ++ elseif p == "B" then ++ local mode, n, s = parse_label(q, false) ++ waction("REL_"..mode, n, s, 1) ++ elseif p == "C" then -- blx gpr only ++ if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then ++ local r = parse_gpr(q) ++ op = op + shl(r, 3) ++ else ++ werror("not supported") ++ end ++ elseif p == "c" then ++ op = op + parse_it(params.op, q) ++ elseif p == "F" then ++ vr = "s" ++ elseif p == "G" then ++ vr = "d" ++ elseif p == "o" then ++ local r, wb = match(q, "^([^!]*)(!?)$") ++ op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0) ++ n = n + 1 ++ elseif p == "R" then ++ if params[1] == "{r15}" and params.op == "pop" then ++ op = 0xf85dfb04; -- pop {pc} coded as T3 ++ elseif params[1] == "{r12}" and params.op == "pop" then ++ op = 0xf85dcb04; -- pop {r12} coded as T3 ++ elseif params[1] == "{r12}" and params.op == "push" then ++ op = 0xf84dcd04; -- push {r12} coded as T3 ++ else ++ op = op + parse_reglist(q) ++ end ++ n = n + 1 ++ elseif p == "r" then ++ op = op + parse_vrlist(q); n = n + 1 ++ elseif p == "w" then ++ local imm = match(q, "^#(.*)$") ++ if imm then ++ local imm5 = parse_imm(q, 5, 0, 0, false) ++ local m = band(op, 0x000f0000) ++ op = op - m + 0x000f0000 + shr(m, 16) + shl(band(imm5, 0x1c), 10) + shl(band(imm5, 0x03), 6); n = n + 1 ++ else ++ local type = band(op, 0x00000030) ++ op = op - 0xea400000 + 0xfa00f000 - type + shl(type, 17) + parse_gpr(q) ++ end ++ elseif p == "Y" then ++ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 ++ if not imm or shr(imm, 8) ~= 0 then ++ werror("bad immediate operand") ++ end ++ op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f) ++ elseif p == "K" then ++ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 ++ if not imm or shr(imm, 8) ~= 0 then ++ werror("bad immediate operand") ++ end ++ op = op + band(imm, 0x00ff) ++ elseif p == "s" then ++ -- Ignored. ++ else ++ assert(false) ++ end ++ end ++ wputpos(pos, op) ++end ++ ++map_op[".template__"] = function(params, template, nparams) ++ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end ++ ++ -- Limit number of section buffer positions used by a single dasm_put(). ++ -- A single opcode needs a maximum of 3 positions. ++ if secpos+3 > maxsecpos then wflush() end ++ local pos = wpos() ++ local lpos, apos, spos = #actlist, #actargs, secpos ++ ++ local ok, err ++ for t in gmatch(template, "[^|]+") do ++ ok, err = pcall(parse_template, params, t, nparams, pos) ++ if ok then return end ++ secpos = spos ++ actlist[lpos+1] = nil ++ actlist[lpos+2] = nil ++ actlist[lpos+3] = nil ++ actargs[apos+1] = nil ++ actargs[apos+2] = nil ++ actargs[apos+3] = nil ++ end ++ error(err, 0) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode to mark the position where the action list is to be emitted. ++map_op[".actionlist_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeactions(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global enum is to be emitted. ++map_op[".globals_1"] = function(params) ++ if not params then return "prefix" end ++ local prefix = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobals(out, prefix) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global names are to be emitted. ++map_op[".globalnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobalnames(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the extern names are to be emitted. ++map_op[".externnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeexternnames(out, name) end) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Label pseudo-opcode (converted from trailing colon form). ++map_op[".label_1"] = function(params) ++ if not params then return "[1-9] | ->global | =>pcexpr" end ++ if secpos+1 > maxsecpos then wflush() end ++ local mode, n, s = parse_label(params[1], true) ++ if mode == "EXT" then werror("bad label definition") end ++ waction("LABEL_"..mode, n, s, 1) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcodes for data storage. ++map_op[".long_*"] = function(params) ++ if not params then return "imm..." end ++ for _,p in ipairs(params) do ++ local n = tonumber(p) ++ if not n then werror("bad immediate `"..p.."'") end ++ if n < 0 then n = n + 2^32 end ++ wputw(n) ++ if secpos+2 > maxsecpos then wflush() end ++ end ++end ++ ++-- Alignment pseudo-opcode. ++map_op[".align_1"] = function(params) ++ if not params then return "numpow2" end ++ if secpos+1 > maxsecpos then wflush() end ++ local align = tonumber(params[1]) ++ if align then ++ local x = align ++ -- Must be a power of 2 in the range (2 ... 256). ++ for i=1,8 do ++ x = x / 2 ++ if x == 1 then ++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. ++ return ++ end ++ end ++ end ++ werror("bad alignment") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode for (primitive) type definitions (map to C types). ++map_op[".type_3"] = function(params, nparams) ++ if not params then ++ return nparams == 2 and "name, ctype" or "name, ctype, reg" ++ end ++ local name, ctype, reg = params[1], params[2], params[3] ++ if not match(name, "^[%a_][%w_]*$") then ++ werror("bad type name `"..name.."'") ++ end ++ local tp = map_type[name] ++ if tp then ++ werror("duplicate type `"..name.."'") ++ end ++ -- Add #type to defines. A bit unclean to put it in map_archdef. ++ map_archdef["#"..name] = "sizeof("..ctype..")" ++ -- Add new type and emit shortcut define. ++ local num = ctypenum + 1 ++ map_type[name] = { ++ ctype = ctype, ++ ctypefmt = format("Dt%X(%%s)", num), ++ reg = reg, ++ } ++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) ++ ctypenum = num ++end ++map_op[".type_2"] = map_op[".type_3"] ++ ++-- Dump type definitions. ++local function dumptypes(out, lvl) ++ local t = {} ++ for name in pairs(map_type) do t[#t+1] = name end ++ sort(t) ++ out:write("Type definitions:\n") ++ for _,name in ipairs(t) do ++ local tp = map_type[name] ++ local reg = tp.reg or "" ++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) ++ end ++ out:write("\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Set the current section. ++function _M.section(num) ++ waction("SECTION", num) ++ wflush(true) -- SECTION is a terminal action. ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Dump architecture description. ++function _M.dumparch(out) ++ out:write(format("DynASM %s version %s, released %s\n\n", ++ _info.arch, _info.version, _info.release)) ++ dumpactions(out) ++end ++ ++-- Dump all user defined elements. ++function _M.dumpdef(out, lvl) ++ dumptypes(out, lvl) ++ dumpglobals(out, lvl) ++ dumpexterns(out, lvl) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pass callbacks from/to the DynASM core. ++function _M.passcb(wl, we, wf, ww) ++ wline, werror, wfatal, wwarn = wl, we, wf, ww ++ return wflush ++end ++ ++-- Setup the arch-specific module. ++function _M.setup(arch, opt) ++ g_arch, g_opt = arch, opt ++end ++ ++local map_cond_b = { ++ eq = "f0008000B", ne = "f0408000B", cs = "f0808000B", cc = "f0c08000B", mi = "f1008000B", pl = "f1408000B", vs = "f1808000B", vc = "f1c08000B", ++ hi = "f2008000B", ls = "f2408000B", ge = "f2808000B", lt = "f2c08000B", gt = "f3008000B", le = "f3408000B", al = "f3808000B", ++ hs = "f0808000B", lo = "f0c08000B", ++} ++ ++-- Merge the core maps and the arch-specific maps. ++function _M.mergemaps(map_coreop, map_def) ++ setmetatable(map_op, { __index = function(t, k) ++ local v = map_coreop[k] ++ if v then return v end ++ local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$") ++ local cv = map_cond[cc] ++ if cv then ++ local v = rawget(t, k1..k2) ++ if type(v) == "string" and k1 == "b" then ++ local scv = map_cond_b[cc] ++ return scv ++ elseif type(v) == "string" then ++ return v ++ end ++ end ++ end }) ++ setmetatable(map_def, { __index = map_archdef }) ++ return map_op, map_def ++end ++ ++return _M ++ ++------------------------------------------------------------------------------ ++ +diff --git a/src/Makefile b/src/Makefile +old mode 100644 +new mode 100755 +index 30d64be2..83d592f3 +--- a/src/Makefile ++++ b/src/Makefile +@@ -36,7 +36,8 @@ CC= $(DEFAULT_CC) + # to slow down the C part by not omitting it. Debugging, tracebacks and + # unwinding are not affected -- the assembler part has frame unwind + # information and GCC emits it where needed (x64) or with -g (see CCDEBUG). +-CCOPT= -O2 -fomit-frame-pointer ++#CCOPT= -O2 -fomit-frame-pointer ++CCOPT= -O2 -fomit-frame-pointer -D__ARM_ARCH_7M__ -DLUAJIT_NO_UNWIND -DLUAJIT_DISABLE_PROFILE + # Use this if you want to generate a smaller binary (but it's slower): + #CCOPT= -Os -fomit-frame-pointer + # Note: it's no longer recommended to use -O3 with GCC 4.x. +@@ -49,7 +50,7 @@ CCOPT= -O2 -fomit-frame-pointer + # + CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse + CCOPT_x64= +-CCOPT_arm= ++CCOPT_arm= -mthumb -march=armv7e-m -mfloat-abi=hard -mfpu=fpv5-d16 + CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= +@@ -71,7 +72,7 @@ CCWARN= -Wall + # as dynamic mode. + # + # Mixed mode creates a static + dynamic library and a statically linked luajit. +-BUILDMODE= mixed ++BUILDMODE= static + # + # Static mode creates a static library and a statically linked luajit. + #BUILDMODE= static +@@ -242,6 +243,9 @@ ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= x86 + else + ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) ++ ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH))) ++ TARGET_ARCH= -D__ARM_ARCH_7M__=1 ++ endif + TARGET_LJARCH= arm + else + ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) +@@ -443,6 +447,9 @@ ifeq (x64,$(TARGET_LJARCH)) + endif + else + ifeq (arm,$(TARGET_LJARCH)) ++ ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH))) ++ DASM_ARCH= armv7m ++ endif + ifeq (iOS,$(TARGET_SYS)) + DASM_AFLAGS+= -D IOS + endif +diff --git a/src/host/buildvm.c b/src/host/buildvm.c +index 9ee47ada..ca0ee47e 100644 +--- a/src/host/buildvm.c ++++ b/src/host/buildvm.c +@@ -60,7 +60,11 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); + #if LJ_TARGET_X86ORX64 + #include "../dynasm/dasm_x86.h" + #elif LJ_TARGET_ARM ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++#include "../dynasm/dasm_armv7m.h" ++#else + #include "../dynasm/dasm_arm.h" ++#endif + #elif LJ_TARGET_ARM64 + #include "../dynasm/dasm_arm64.h" + #elif LJ_TARGET_PPC +diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c +index 7baa011f..1fc72a9d 100644 +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -114,6 +114,20 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + emit_asm_words(ctx, p, n-4); + ins = *(uint32_t *)(p+n-4); + #if LJ_TARGET_ARM ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++ if ((ins & 0xd000f800) == 0xd000f000) { ++ fprintf(ctx->fp, "\tbl %s\n", sym); ++ } ++ else if ((ins & 0xd000f800) == 0x9000f000) { ++ fprintf(ctx->fp, "\tb %s\n", sym); ++ } ++ else { ++ fprintf(stderr, ++ "Error: unsupported opcode %08x for %s symbol relocation.\n", ++ ins, sym); ++ exit(1); ++ } ++#else + if ((ins & 0xff000000u) == 0xfa000000u) { + fprintf(ctx->fp, "\tblx %s\n", sym); + } else if ((ins & 0x0e000000u) == 0x0a000000u) { +@@ -125,6 +139,7 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + ins, sym); + exit(1); + } ++#endif + #elif LJ_TARGET_ARM64 + if ((ins >> 26) == 0x25u) { + fprintf(ctx->fp, "\tbl %s\n", sym); +@@ -193,6 +208,16 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc + break; + } + #endif ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++ fprintf(ctx->fp, ++ "\n\t.globl %s\n" ++ "\t.thumb_func\n" ++ "\t.hidden %s\n" ++ "\t.type %s, " ELFASM_PX "%s\n" ++ "\t.size %s, %d\n" ++ "%s:\n", ++ name,name,name,isfunc ? "function" : "object",name,size,name); ++#else + fprintf(ctx->fp, + "\n\t.globl %s\n" + "\t.hidden %s\n" +@@ -200,6 +225,7 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc + "\t.size %s, %d\n" + "%s:\n", + name, name, name, isfunc ? "function" : "object", name, size, name); ++#endif + break; + case BUILD_coffasm: + fprintf(ctx->fp, "\n\t.globl %s\n", name); +@@ -242,8 +268,16 @@ void emit_asm(BuildCtx *ctx) + int i, rel; + + fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++ fprintf(ctx->fp, "\t.syntax unified\n"); ++ fprintf(ctx->fp, "\t.thumb\n"); ++#endif + fprintf(ctx->fp, "\t.text\n"); ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++ emit_asm_align(ctx, 2); ++#else + emit_asm_align(ctx, 4); ++#endif + + #if LJ_TARGET_PS3 + emit_asm_label(ctx, ctx->beginsym, ctx->codesz, 0); +diff --git a/src/lj_alloc.c b/src/lj_alloc.c +index 20e60493..9003ba09 100644 +--- a/src/lj_alloc.c ++++ b/src/lj_alloc.c +@@ -38,10 +38,17 @@ + #define MAX_SIZE_T (~(size_t)0) + #define MALLOC_ALIGNMENT ((size_t)8U) + ++#if LJ_TARGET_NUTTX ++#define DEFAULT_GRANULARITY ((size_t)32U * (size_t)1024U) ++#define DEFAULT_TRIM_THRESHOLD ((size_t)512U * (size_t)1024U) ++#define DEFAULT_MMAP_THRESHOLD ((size_t)32U * (size_t)1024U) ++#define MAX_RELEASE_CHECK_RATE 255 ++#else + #define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U) + #define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) + #define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U) + #define MAX_RELEASE_CHECK_RATE 255 ++#endif + + /* ------------------- size_t and alignment properties -------------------- */ + +@@ -90,9 +97,34 @@ + + #include + /* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ ++#if LJ_TARGET_NUTTX ++#include ++#include ++ ++static struct mm_heap_s *g_alloc_heap; ++ ++static void init_mmap(void) ++{ ++ g_alloc_heap = mm_initialize("alloc", ++ (void *)CONFIG_LUAJIT_ALLOC_START, ++ CONFIG_LUAJIT_ALLOC_SIZE); ++} ++#define INIT_MMAP() init_mmap() ++ ++#define CALL_MMAP(prng, size) mm_malloc(g_alloc_heap, (size_t)size) ++#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s) ++ ++static int CALL_MUNMAP(void *ptr, size_t size) ++{ ++ if (ptr == NULL) return -1; ++ mm_free(g_alloc_heap, ptr); ++ return 0; ++} ++#else + #include + + #define LJ_ALLOC_MMAP 1 ++#endif + + #if LJ_64 + +diff --git a/src/lj_arch.h b/src/lj_arch.h +index bddd757d..522e67f8 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -39,6 +39,7 @@ + #define LUAJIT_OS_OSX 3 + #define LUAJIT_OS_BSD 4 + #define LUAJIT_OS_POSIX 5 ++#define LUAJIT_OS_NUTTX 6 + + /* Number mode. */ + #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ +@@ -113,6 +114,8 @@ + #define LJ_OS_NAME "BSD" + #elif LUAJIT_OS == LUAJIT_OS_POSIX + #define LJ_OS_NAME "POSIX" ++#elif LUAJIT_OS == LUAJIT_OS_NUTTX ++#define LJ_OS_NAME "NUTTX" + #else + #define LJ_OS_NAME "Other" + #endif +@@ -122,6 +125,7 @@ + #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) + #define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) + #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) ++#define LJ_TARGET_NUTTX (LUAJIT_OS == LUAJIT_OS_NUTTX) + #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX + + #if TARGET_OS_IPHONE +@@ -217,8 +221,11 @@ + #endif + + #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM +- ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++#define LJ_ARCH_NAME "armv7m" ++#else + #define LJ_ARCH_NAME "arm" ++#endif + #define LJ_ARCH_BITS 32 + #define LJ_ARCH_ENDIAN LUAJIT_LE + #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__ +@@ -230,8 +237,12 @@ + #define LJ_ABI_EABI 1 + #define LJ_TARGET_ARM 1 + #define LJ_TARGET_EHRETREG 0 +-#define LJ_TARGET_EHRAREG 14 ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++/* No need to test jump address range, because we use veeners. */ ++//#define LJ_TARGET_JUMPRANGE 24 /* +-2^24 = +-16MB */ ++#else + #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ ++#endif + #define LJ_TARGET_MASKSHIFT 0 + #define LJ_TARGET_MASKROT 1 + #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ +@@ -239,7 +250,7 @@ + + #if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ + #define LJ_ARCH_VERSION 80 +-#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ ++#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ + #define LJ_ARCH_VERSION 70 + #elif __ARM_ARCH_6T2__ + #define LJ_ARCH_VERSION 61 +@@ -488,7 +499,7 @@ + #if defined(__ARMEB__) + #error "No support for big-endian ARM" + #endif +-#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ ++#if __ARM_ARCH_6M__ /*|| __ARM_ARCH_7M__ || __ARM_ARCH_7EM__*/ + #error "No support for Cortex-M CPUs" + #endif + #if !(__ARM_EABI__ || LJ_TARGET_IOS) +diff --git a/src/lj_asm.c b/src/lj_asm.c +index 6f5e0c45..429aa161 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -178,7 +178,11 @@ IRFLDEF(FLOFS) + #if LJ_TARGET_X86ORX64 + #include "lj_emit_x86.h" + #elif LJ_TARGET_ARM ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++#include "lj_emit_armv7m.h" ++#else + #include "lj_emit_arm.h" ++#endif + #elif LJ_TARGET_ARM64 + #include "lj_emit_arm64.h" + #elif LJ_TARGET_PPC +@@ -1655,7 +1659,11 @@ static void asm_loop(ASMState *as) + #if LJ_TARGET_X86ORX64 + #include "lj_asm_x86.h" + #elif LJ_TARGET_ARM ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++#include "lj_asm_armv7m.h" ++#else + #include "lj_asm_arm.h" ++#endif + #elif LJ_TARGET_ARM64 + #include "lj_asm_arm64.h" + #elif LJ_TARGET_PPC +diff --git a/src/lj_asm_armv7m.h b/src/lj_asm_armv7m.h +new file mode 100644 +index 00000000..1bdd4a8a +--- /dev/null ++++ b/src/lj_asm_armv7m.h +@@ -0,0 +1,2520 @@ ++/* ++** ARM IR assembler (SSA IR -> machine code). ++** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++/* -- Register allocator extensions --------------------------------------- */ ++ ++/* Allocate a register with a hint. */ ++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!ra_hashint(r) && !iscrossref(as, ref)) ++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ ++ r = ra_allocref(as, ref, allow); ++ } ++ ra_noweak(as, r); ++ return r; ++} ++ ++/* Allocate a scratch register pair. */ ++static Reg ra_scratchpair(ASMState *as, RegSet allow) ++{ ++ RegSet pick1 = as->freeset & allow; ++ RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN; ++ Reg r; ++ if (pick2) { ++ r = rset_picktop(pick2); ++ } ++ else { ++ RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN; ++ if (pick) { ++ r = rset_picktop(pick); ++ ra_restore(as, regcost_ref(as->cost[r + 1])); ++ } ++ else { ++ pick = pick1 & (allow << 1) & RSET_GPRODD; ++ if (pick) { ++ r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick) - 1])); ++ } ++ else { ++ r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN); ++ ra_restore(as, regcost_ref(as->cost[r + 1])); ++ } ++ } ++ } ++ lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); ++ ra_modified(as, r); ++ ra_modified(as, r + 1); ++ RA_DBGX((as, "scratchpair $r $r", r, r + 1)); ++ return r; ++} ++ ++#if !LJ_SOFTFP ++/* Allocate two source registers for three-operand instructions. */ ++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) ++{ ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ Reg left = irl->r, right = irr->r; ++ if (ra_hasreg(left)) { ++ ra_noweak(as, left); ++ if (ra_noreg(right)) ++ right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); ++ else ++ ra_noweak(as, right); ++ } ++ else if (ra_hasreg(right)) { ++ ra_noweak(as, right); ++ left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); ++ } ++ else if (ra_hashint(right)) { ++ right = ra_allocref(as, ir->op2, allow); ++ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); ++ } ++ else { ++ left = ra_allocref(as, ir->op1, allow); ++ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); ++ } ++ return left | (right << 8); ++} ++#endif ++ ++/* -- Guard handling ------------------------------------------------------ */ ++ ++/* Generate an exit stub group at the bottom of the reserved MCode memory. */ ++static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) ++{ ++ MCode *mxp = as->mcbot; ++ int i; ++ if (mxp + 8 * 4 + 4*EXITSTUBS_PER_GROUP >= as->mctop) ++ asm_mclimit(as); ++ /* lj_vm_exit_interp_veneer */ ++ *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */ ++ *mxp++ = (MCode)lj_vm_exit_interp; ++ /* lj_vm_exit_handler_veneer */ ++ *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */ ++ *mxp++ = (MCode)lj_vm_exit_handler; ++ /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ ++ *mxp++ = ARMI_STR | ARMI_LSX_U | ARMF_T(RID_LR) | ARMF_N(RID_SP); ++ *mxp = ARMI_BL | ARMC_BL((-4) << 1); /* lj_vm_exit_handler_veneer */ ++ mxp++; ++ *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ ++ *mxp++ = group*EXITSTUBS_PER_GROUP; ++ for (i = 0; i < EXITSTUBS_PER_GROUP; i++) ++ *mxp++ = ARMI_B_T4 | ARMC_BL((-5 - i) << 1); ++ lj_mcode_sync(as->mcbot, mxp); ++ lj_mcode_commitbot(as->J, mxp); ++ as->mcbot = mxp; ++ as->mclim = as->mcbot + MCLIM_REDZONE; ++ return mxp - EXITSTUBS_PER_GROUP; ++} ++ ++/* Setup all needed exit stubs. */ ++static void asm_exitstub_setup(ASMState *as, ExitNo nexits) ++{ ++ ExitNo i; ++ if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) ++ lj_trace_err(as->J, LJ_TRERR_SNAPOV); ++ for (i = 0; i < (nexits + EXITSTUBS_PER_GROUP - 1) / EXITSTUBS_PER_GROUP; i++) ++ if (as->J->exitstubgroup[i] == NULL) ++ as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); ++} ++ ++/* Emit conditional branch to exit for guard. */ ++static void asm_guardcc(ASMState *as, ARMCC cc) ++{ ++ MCode *target = exitstub_addr(as->J, as->snapno); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->loopinv = 1; ++ *p = ARMI_BL | ARMC_BL((target - p - 1) << 1); ++ emit_branch(as, ARMF_CC(ARMI_B, cc ^ 1), p); ++ return; ++ } ++ emit_branchlink(as, ARMI_BL, target); ++ ARMI_IT(cc); ++} ++ ++/* -- Operand fusion ------------------------------------------------------ */ ++ ++/* Limit linear search to this distance. Avoids O(n^2) behavior. */ ++#define CONFLICT_SEARCH_LIM 31 ++ ++/* Check if there's no conflicting instruction between curins and ref. */ ++static int noconflict(ASMState *as, IRRef ref, IROp conflict) ++{ ++ IRIns *ir = as->ir; ++ IRRef i = as->curins; ++ if (i > ref + CONFLICT_SEARCH_LIM) ++ return 0; /* Give up, ref is too far away. */ ++ while (--i > ref) ++ if (ir[i].o == conflict) ++ return 0; /* Conflict found. */ ++ return 1; /* Ok, no conflict. */ ++} ++ ++/* Fuse the array base of colocated arrays. */ ++static int32_t asm_fuseabase(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && ++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) ++ return (int32_t)sizeof(GCtab); ++ return 0; ++} ++ ++/* Fuse array/hash/upvalue reference into register+offset operand. */ ++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, int lim) ++{ ++ IRIns *ir = IR(ref); ++ if (ra_noreg(ir->r)) { ++ if (ir->o == IR_AREF) { ++ if (mayfuse(as, ref)) { ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (ofs > -lim && ofs < lim) { ++ *ofsp = ofs; ++ return ra_alloc1(as, refa, allow); ++ } ++ } ++ } ++ } ++ else if (ir->o == IR_HREFK) { ++ if (mayfuse(as, ref)) { ++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); ++ if (ofs < lim) { ++ *ofsp = ofs; ++ return ra_alloc1(as, ir->op1, allow); ++ } ++ } ++ } ++ else if (ir->o == IR_UREFC) { ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); ++ *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ ++ return ra_allock(as, (ofs & ~255), allow); ++ } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = 0; ++ return RID_SP; ++ } ++ } ++ *ofsp = 0; ++ return ra_alloc1(as, ref, allow); ++} ++ ++/* Fuse m operand into arithmetic/logic instructions. */ ++static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow, uint32_t *rs) ++{ ++ IRIns *ir = IR(ref); ++ if (ra_hasreg(ir->r)) { ++ ra_noweak(as, ir->r); ++ return ARMF_M(ir->r); ++ } ++ else if (irref_isk(ref)) { ++ uint32_t k = emit_isk12(ai, ir->i); ++ if (k) ++ return k; ++ } ++ else if (mayfuse(as, ref)) { ++ if (ir->o >= IR_BSHL && ir->o <= IR_BROR) { ++ Reg m = ra_alloc1(as, ir->op1, allow); ++ ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL : ++ ir->o == IR_BSHR ? ARMSH_LSR : ++ ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR; ++ if (irref_isk(ir->op2)) { ++ return ARMF_M(m) | ARMF_SH(sh, (IR(ir->op2)->i & 31)); ++ } ++ else { ++ Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m)); ++ *rs = ARMF_RSH(sh, s); ++ return ARMF_M(m); ++ } ++ } ++ else if (ir->o == IR_ADD && ir->op1 == ir->op2) { ++ Reg m = ra_alloc1(as, ir->op1, allow); ++ return ARMF_M(m) | ARMF_SH(ARMSH_LSL, 1); ++ } ++ } ++ return ARMF_M(ra_allocref(as, ref, allow)); ++} ++ ++/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ ++static IRRef asm_fuselsl2(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && ++ irref_isk(ir->op2) && IR(ir->op2)->i == 2) ++ return ir->op1; ++ return 0; /* No fusion. */ ++} ++ ++/* Fuse XLOAD/XSTORE reference into load/store operand. */ ++static void asm_fusexref(ASMState *as, ++ ARMIns ai, ++ Reg rd, ++ IRRef ref, ++ RegSet allow, ++ int32_t ofs) ++{ ++ IRIns *ir = IR(ref); ++ Reg base; ++ if (ra_noreg(ir->r) && canfuse(as, ir)) { ++ int32_t lim = (!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) ? 1024 : ++ (ai & 0x00000080) ? 4096 : 256; ++ if (ir->o == IR_ADD) { ++ int32_t ofs2; ++ if (irref_isk(ir->op2) && ++ (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim && ++ (!(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) || !(ofs2 & 3))) { ++ ofs = ofs2; ++ ref = ir->op1; ++ } ++ else if (ofs == 0 && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) { ++ IRRef lref = ir->op1, rref = ir->op2; ++ Reg rn, rm; ++ IRRef sref = asm_fuselsl2(as, rref); ++ if (sref) { ++ rref = sref; ++ ai |= ARMF_LSL(2); ++ } ++ else if ((sref = asm_fuselsl2(as, lref)) != 0) { ++ lref = rref; ++ rref = sref; ++ ai |= ARMF_LSL(2); ++ } ++ rn = ra_alloc1(as, lref, allow); ++ rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); ++ emit_tnm(as, ai, rd, rn, rm); ++ return; ++ } ++ } ++ else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) { ++ lj_assertA(ofs == 0, "bad usage"); ++ ofs = (int32_t)sizeof(GCstr); ++ if (irref_isk(ir->op2)) { ++ ofs += IR(ir->op2)->i; ++ ref = ir->op1; ++ } ++ else if (irref_isk(ir->op1)) { ++ ofs += IR(ir->op1)->i; ++ ref = ir->op2; ++ } ++ else { ++ /* NYI: Fuse ADD with constant. */ ++ Reg rn = ra_alloc1(as, ir->op1, allow); ++ uint32_t rs = 0; ++ uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn), &rs); ++ if ((ai & 0x0000fe00) == 0x0000f800) ++ emit_lso(as, ai, rd, rd, ofs); ++ else ++ emit_lsox(as, ai, rd, rd, ofs); ++ emit_dn(as, ARMI_ADD ^ m, rd, rn); ++ if (rs) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); ++ return; ++ } ++ if (ofs <= -lim || ofs >= lim) { ++ Reg rn = ra_alloc1(as, ref, allow); ++ Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); ++ emit_tnm(as, ai, rd, rn, rm); ++ return; ++ } ++ } ++ } ++ base = ra_alloc1(as, ref, allow); ++#if !LJ_SOFTFP ++ if ((ai & 0x0000ff00) == 0x0000ed00) ++ emit_vlso(as, ai, rd, base, ofs); ++ else ++#endif ++ if ((ai & 0x0000fe00) == 0x0000f800) ++ emit_lso(as, ai, rd, base, ofs); ++ else ++ emit_lsox(as, ai, rd, base, ofs); ++} ++ ++#if !LJ_SOFTFP ++/* Fuse to multiply-add/sub instruction. */ ++static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) ++{ ++ IRRef lref = ir->op1, rref = ir->op2; ++ IRIns *irm; ++ if (lref != rref && ++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ++ ra_noreg(irm->r)) || ++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && ++ (rref = lref, ai = air, ra_noreg(irm->r))))) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ++ irm, ++ rset_exclude(rset_exclude(RSET_FPR, dest), add)); ++ right = (left >> 8); left &= 255; ++ emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15)); ++ if (dest != add) emit_tm(as, ARMI_VMOV_D, (dest & 15), (add & 15)); ++ return 1; ++ } ++ return 0; ++} ++#endif ++ ++/* -- Calls --------------------------------------------------------------- */ ++ ++/* Generate a call to a C function. */ ++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) ++{ ++ uint32_t n, nargs = CCI_XNARGS(ci); ++ int32_t ofs = 0; ++#if LJ_SOFTFP ++ Reg gpr = REGARG_FIRSTGPR; ++#else ++ Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0; ++#endif ++ if ((void *)ci->func) ++ emit_call(as, (void *)ci->func); ++#if !LJ_SOFTFP ++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) ++ as->cost[gpr] = REGCOST(~0u, ASMREF_L); ++ gpr = REGARG_FIRSTGPR; ++#endif ++ for (n = 0; n < nargs; n++) { ++ /* Setup args. */ ++ IRRef ref = args[n]; ++ IRIns *ir = IR(ref); ++#if !LJ_SOFTFP ++ if (ref && irt_isfp(ir->t)) { ++ RegSet of = as->freeset; ++ Reg src; ++ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { ++ if (irt_isnum(ir->t)) { ++ if (fpr <= REGARG_LASTFPR) { ++ ra_leftov(as, fpr, ref); ++ fpr++; ++ continue; ++ } ++ } ++ else if (fprodd) { ++ /* Ick. */ ++ src = ra_alloc1(as, ref, RSET_FPR); ++ emit_tm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00000040); ++ fprodd = 0; ++ continue; ++ } ++ else if (fpr <= REGARG_LASTFPR) { ++ ra_leftov(as, fpr, ref); ++ fprodd = fpr++; ++ continue; ++ } ++ /* Workaround to protect argument GPRs from being used for remat. */ ++ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1); ++ src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ ++ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1)); ++ fprodd = 0; ++ goto stackfp; ++ } ++ /* Workaround to protect argument GPRs from being used for remat. */ ++ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1); ++ src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ ++ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1)); ++ if (irt_isnum(ir->t)) gpr = (gpr + 1) & ~1u; ++ if (gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ if (irt_isnum(ir->t)) { ++ lj_assertA(rset_test(as->freeset, gpr+1), ++ "reg %d not free", gpr+1); /* Ditto. */ ++ emit_tnm(as, ARMI_VMOV_RR_D, gpr, gpr + 1, (src & 15)); ++ gpr += 2; ++ } ++ else { ++ emit_tn(as, ARMI_VMOV_R_S, gpr, (src & 15)); ++ gpr++; ++ } ++ } ++ else { ++stackfp: ++ if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; ++ emit_spstore(as, ir, src, ofs); ++ ofs += irt_isnum(ir->t) ? 8 : 4; ++ } ++ } ++ else ++#endif ++ { ++ if (gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ if (ref) ra_leftov(as, gpr, ref); ++ gpr++; ++ } ++ else { ++ if (ref) { ++ Reg r = ra_alloc1(as, ref, RSET_GPR); ++ emit_spstore(as, ir, r, ofs); ++ } ++ ofs += 4; ++ } ++ } ++ } ++} ++ ++/* Setup result reg/sp for call. Evict scratch regs. */ ++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ RegSet drop = RSET_SCRATCH; ++ int hiop = ((ir + 1)->o == IR_HIOP && !irt_isnil((ir + 1)->t)); ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ if (hiop && ra_hasreg((ir + 1)->r)) ++ rset_clear(drop, (ir + 1)->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); /* Evictions must be performed first. */ ++ if (ra_used(ir)) { ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); ++ if (!LJ_SOFTFP && irt_isfp(ir->t)) { ++ if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64 | CCI_VARARG))) { ++ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); ++ if (irt_isnum(ir->t)) ++ emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest); ++ else ++ emit_tn(as, ARMI_VMOV_S_R, RID_RET, dest); ++ } ++ else { ++ ra_destreg(as, ir, RID_FPRET); ++ } ++ } ++ else if (hiop) { ++ ra_destpair(as, ir); ++ } ++ else { ++ ra_destreg(as, ir, RID_RET); ++ } ++ } ++ UNUSED(ci); ++} ++ ++static void asm_callx(ASMState *as, IRIns *ir) ++{ ++ IRRef args[CCI_NARGS_MAX * 2]; ++ CCallInfo ci; ++ IRRef func; ++ IRIns *irf; ++ ci.flags = asm_callx_flags(as, ir); ++ asm_collectargs(as, ir, &ci, args); ++ asm_setupresult(as, ir, &ci); ++ func = ir->op2; irf = IR(func); ++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ++ if (irref_isk(func)) { ++ /* Call to constant address. */ ++ ci.func = (ASMFunction)(void *)(irf->i); ++ } ++ else { ++ /* Need a non-argument register for indirect calls. */ ++ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12 + 1)); ++ emit_m(as, ARMI_BLXr, freg); ++ ci.func = (ASMFunction)(void *)0; ++ } ++ asm_gencall(as, &ci, args); ++} ++ ++/* -- Returns ------------------------------------------------------------- */ ++ ++/* Return to lower frame. Guard that it goes to the right spot. */ ++static void asm_retf(ASMState *as, IRIns *ir) ++{ ++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); ++ void *pc = ir_kptr(IR(ir->op2)); ++ int32_t delta = 1 + LJ_FR2 + bc_a(*((const BCIns *)pc - 1)); ++ as->topslot -= (BCReg)delta; ++ if ((int32_t)as->topslot < 0) as->topslot = 0; ++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ ++ /* Need to force a spill on REF_BASE now to update the stack slot. */ ++ emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE))); ++ emit_setgl(as, base, jit_base); ++ emit_addptr(as, base, -8*delta); ++ asm_guardcc(as, CC_NE); ++ emit_nm(as, ++ ARMI_CMP, ++ RID_TMP, ++ ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); ++ emit_lso(as, ARMI_LDR, RID_TMP, base, -4); ++} ++ ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L); ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ //if ((as->flags & JIT_F_ARMV6T2)) { //jturnsek!!! ++ // emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp); ++ //} else { ++ emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp); ++ emit_dn(as, ARMC_K12(ARMI_AND, 255), tmp, tmp); ++ //} ++ emit_lso(as, ARMI_LDR, RID_TMP, ++ ra_allock(as, (addr & ~4095), ++ rset_exclude(rset_exclude(RSET_GPR, sb), tmp)), ++ (addr & 4095)); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ ++/* -- Type conversions ---------------------------------------------------- */ ++ ++#if !LJ_SOFTFP ++static void asm_tointg(ASMState *as, IRIns *ir, Reg left) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ asm_guardcc(as, CC_NE); ++ emit_t(as, ARMI_VMRS, 0); ++ emit_tm(as, ARMI_VCMP_D, (tmp & 15), (left & 15)); ++ emit_tm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15)); ++ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); ++ emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15)); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_FPR; ++ Reg left = ra_alloc1(as, ir->op1, allow); ++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); ++ Reg tmp = ra_scratch(as, rset_clear(allow, right)); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); ++ emit_tnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); ++} ++#endif ++ ++static void asm_conv(ASMState *as, IRIns *ir) ++{ ++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); ++#if !LJ_SOFTFP ++ int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++#endif ++ IRRef lref = ir->op1; ++ /* 64 bit integer conversions are handled by SPLIT. */ ++ lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), ++ "IR %04d has unsplit 64 bit type", ++ (int)(ir - as->ir) - REF_BIAS); ++#if LJ_SOFTFP ++ /* FP conversions are handled by SPLIT. */ ++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), ++ "IR %04d has FP type", ++ (int)(ir - as->ir) - REF_BIAS); ++ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ ++#else ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); ++ if (irt_isfp(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ if (stfp) { ++ /* FP to FP conversion. */ ++ emit_tm(as, ++ st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32, ++ (dest & 15), ++ (ra_alloc1(as, lref, RSET_FPR) & 15)); ++ } ++ else { ++ /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ ARMIns ai = irt_isfloat(ir->t) ? ++ (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) : ++ (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32); ++ emit_tm(as, ai, (dest & 15), (dest & 15)); ++ emit_tn(as, ARMI_VMOV_S_R, left, (dest & 15)); ++ } ++ } ++ else if (stfp) { ++ /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); ++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); ++ } ++ else { ++ Reg left = ra_alloc1(as, lref, RSET_FPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ ARMIns ai; ++ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); ++ ai = irt_isint(ir->t) ? ++ (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : ++ (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); ++ emit_tm(as, ai, (tmp & 15), (left & 15)); ++ } ++ } ++ else ++#endif ++ { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (st >= IRT_I8 && st <= IRT_U16) { ++ /* Extend to 32 bit integer. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); ++ if ((as->flags & JIT_F_ARMV6)) { ++ ARMIns ai = st == IRT_I8 ? ARMI_SXTB : ++ st == IRT_U8 ? ARMI_UXTB : ++ st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; ++ emit_dm(as, ai, dest, left); ++ } ++ else if (st == IRT_U8) { ++ emit_dn(as, ARMC_K12(ARMI_AND, 255), dest, left); ++ } ++ else { ++ uint32_t shift = st == IRT_I8 ? 24 : 16; ++ ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; ++ emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, RID_TMP); ++ emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); ++ } ++ } ++ else { ++ /* Handle 32/32 bit no-op (cast). */ ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } ++ } ++} ++ ++static void asm_strto(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; ++ IRRef args[2]; ++ Reg rlo = 0, rhi = 0, tmp; ++ int destused = ra_used(ir); ++ int32_t ofs = 0; ++ ra_evictset(as, RSET_SCRATCH); ++#if LJ_SOFTFP ++ if (destused) { ++ if (ra_hasspill(ir->s) && ra_hasspill((ir + 1)->s) && ++ (ir->s & 1) == 0 && ir->s + 1 == (ir + 1)->s) { ++ int i; ++ for (i = 0; i < 2; i++) { ++ Reg r = (ir + i)->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ ra_modified(as, r); ++ emit_spload(as, ir + i, r, sps_scale((ir + i)->s)); ++ } ++ } ++ ofs = sps_scale(ir->s); ++ destused = 0; ++ } ++ else { ++ rhi = ra_dest(as, ir + 1, RSET_GPR); ++ rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); ++ } ++ } ++ asm_guardcc(as, CC_EQ); ++ if (destused) { ++ emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); ++ emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); ++ } ++#else ++ UNUSED(rhi); ++ if (destused) { ++ if (ra_hasspill(ir->s)) { ++ ofs = sps_scale(ir->s); ++ destused = 0; ++ if (ra_hasreg(ir->r)) { ++ ra_free(as, ir->r); ++ ra_modified(as, ir->r); ++ emit_spload(as, ir, ir->r, ofs); ++ } ++ } ++ else { ++ rlo = ra_dest(as, ir, RSET_FPR); ++ } ++ } ++ asm_guardcc(as, CC_EQ); ++ if (destused) ++ emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0); ++#endif ++ emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET); /* Test return status. */ ++ args[0] = ir->op1; /* GCstr *str */ ++ args[1] = ASMREF_TMP1; /* TValue *n */ ++ asm_gencall(as, ci, args); ++ tmp = ra_releasetmp(as, ASMREF_TMP1); ++ if (ofs == 0) ++ emit_dm(as, ARMI_MOV, tmp, RID_SP); ++ else ++ emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); ++} ++ ++/* -- Memory references --------------------------------------------------- */ ++ ++/* Get pointer to TValue. */ ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) ++{ ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if ((mode & IRTMPREF_OUT1)) { ++#if LJ_SOFTFP ++ lj_assertA(irref_isk(ref), "unsplit FP op"); ++ emit_dm(as, ARMI_MOV, dest, RID_SP); ++ emit_lso(as, ARMI_STR, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), ++ RID_SP, 0); ++ emit_lso(as, ARMI_STR, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), ++ RID_SP, 4); ++#else ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_dm(as, ARMI_MOV, dest, RID_SP); ++ emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0); ++#endif ++ } else if (irref_isk(ref)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, i32ptr(ir_knum(ir)), dest); ++ } else { ++#if LJ_SOFTFP ++ lj_assertA(0, "unsplit FP op"); ++#else ++ /* Otherwise force a spill and use the spill slot. */ ++ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); ++#endif ++ } ++ } else { ++ /* Otherwise use [sp] and [sp+4] to hold the TValue. ++ ** This assumes the following call has max. 4 args. ++ */ ++ Reg type; ++ emit_dm(as, ARMI_MOV, dest, RID_SP); ++ if (!irt_ispri(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_GPR); ++ emit_lso(as, ARMI_STR, src, RID_SP, 0); ++ } ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) ++ type = ra_alloc1(as, ref+1, RSET_GPR); ++ else ++ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); ++ emit_lso(as, ARMI_STR, type, RID_SP, 4); ++ } ++ } ++ else { ++ emit_dm(as, ARMI_MOV, dest, RID_SP); ++ } ++} ++ ++static void asm_aref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg idx, base; ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i); ++ if (k) { ++ base = ra_alloc1(as, refa, RSET_GPR); ++ emit_dn(as, ARMI_ADD ^ k, dest, base); ++ return; ++ } ++ } ++ base = ra_alloc1(as, ir->op1, RSET_GPR); ++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); ++ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, base, idx); ++} ++ ++/* Inlined hash lookup. Specialized for key type and for const keys. ++** The equivalent C code is: ++** Node *n = hashkey(t, key); ++** do { ++** if (lj_obj_equal(&n->key, key)) return &n->val; ++** } while ((n = nextnode(n))); ++** return niltv(L); ++*/ ++static void asm_href(ASMState *as, IRIns *ir, IROp merge) ++{ ++ RegSet allow = RSET_GPR; ++ int destused = ra_used(ir); ++ Reg dest = ra_dest(as, ir, allow); ++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); ++ Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP; ++ IRRef refkey = ir->op2; ++ IRIns *irkey = IR(refkey); ++ IRType1 kt = irkey->t; ++ int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt)); ++ uint32_t khash; ++ MCLabel l_end, l_loop; ++ rset_clear(allow, tab); ++ if (!irref_isk(refkey) || irt_isstr(kt)) { ++#if LJ_SOFTFP ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ if (irkey[1].o == IR_HIOP) { ++ if (ra_hasreg((irkey + 1)->r)) { ++ keynumhi = (irkey + 1)->r; ++ keyhi = RID_TMP; ++ ra_noweak(as, keynumhi); ++ } ++ else { ++ keyhi = keynumhi = ra_allocref(as, refkey + 1, allow); ++ } ++ rset_clear(allow, keynumhi); ++ khi = 0; ++ } ++#else ++ if (irt_isnum(kt)) { ++ key = ra_scratch(as, allow); ++ rset_clear(allow, key); ++ keyhi = keynumhi = ra_scratch(as, allow); ++ rset_clear(allow, keyhi); ++ khi = 0; ++ } ++ else { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++#endif ++ } ++ else if (irt_isnum(kt)) { ++ int32_t val = (int32_t)ir_knum(irkey)->u32.lo; ++ k = emit_isk12(ARMI_CMP, val); ++ if (!k) { ++ key = ra_allock(as, val, allow); ++ rset_clear(allow, key); ++ } ++ val = (int32_t)ir_knum(irkey)->u32.hi; ++ khi = emit_isk12(ARMI_CMP, val); ++ if (!khi) { ++ keyhi = ra_allock(as, val, allow); ++ rset_clear(allow, keyhi); ++ } ++ } ++ else if (!irt_ispri(kt)) { ++ k = emit_isk12(ARMI_CMP, irkey->i); ++ if (!k) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++ } ++ if (!irt_ispri(kt)) ++ tmp = ra_scratchpair(as, allow); ++ ++ /* Key not found in chain: jump to exit (if merged) or load niltv. */ ++ l_end = emit_label(as); ++ as->invmcp = NULL; ++ if (merge == IR_NE) ++ asm_guardcc(as, CC_AL); ++ else if (destused) ++ emit_loada(as, dest, niltvg(J2G(as->J))); ++ ++ /* Follow hash chain until the end. */ ++ l_loop = --as->mcp; ++ emit_n(as, ARMC_K12(ARMI_CMP, 0), dest); ++ emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next)); ++ ++ /* Type and value comparison. */ ++ if (merge == IR_EQ) ++ asm_guardcc(as, CC_EQ); ++ else { ++ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); ++ } ++ if (!irt_ispri(kt)) { ++ emit_nm(as, ARMI_CMP ^ k, tmp, key); ++ ARMI_IT(CC_EQ); ++ emit_nm(as, ARMI_CMP ^ khi, tmp + 1, keyhi); ++ emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key)); ++ } ++ else { ++ emit_n(as, ARMI_CMP ^ khi, tmp); ++ emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it)); ++ } ++ *l_loop = ARMF_CC(ARMI_B, CC_NE) | ARMC_B((as->mcp - l_loop - 1) << 1); ++ ++ /* Load main position relative to tab->node into dest. */ ++ khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; ++ if (khash == 0) { ++ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); ++ } ++ else { ++ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); ++ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); ++ if (irt_isstr(kt)) { ++ /* Fetch of str->sid is cheaper than ra_allock. */ ++ emit_dnm(as, ARMI_AND, tmp, tmp + 1, RID_TMP); ++ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); ++ emit_lso(as, ARMI_LDR, tmp + 1, key, (int32_t)offsetof(GCstr, sid)); ++ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); ++ } ++ else if (irref_isk(refkey)) { ++ emit_opk(as, ++ ARMI_AND, ++ tmp, ++ RID_TMP, ++ (int32_t)khash, ++ rset_exclude(rset_exclude(RSET_GPR, tab), dest)); ++ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); ++ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); ++ } ++ else { ++ /* Must match with hash*() in lj_tab.c. */ ++ if (ra_hasreg(keynumhi)) { ++ /* Canonicalize +-0.0 to 0.0. */ ++ if (keyhi == RID_TMP) { ++ emit_dm(as, ARMI_MOV, keyhi, keynumhi); ++ ARMI_IT(CC_NE); ++ } ++ emit_d(as, ARMC_K12(ARMI_MOV, 0), keyhi); ++ ARMI_IT(CC_EQ); ++ } ++ emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP); ++ emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT3), tmp, tmp, tmp + 1); ++ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); ++ emit_dnm(as, ++ ARMI_EOR|ARMF_SH(ARMSH_ROR, 32 - ((HASH_ROT2 + HASH_ROT1) & 31)), ++ tmp, ++ tmp+1, ++ tmp); ++ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); ++ emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT1), tmp + 1, tmp + 1, tmp); ++ if (ra_hasreg(keynumhi)) { ++ emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key); ++ emit_dnm(as, ARMI_ORR | ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ ++ emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); ++#if !LJ_SOFTFP ++ emit_tnm(as, ++ ARMI_VMOV_RR_D, ++ key, ++ keynumhi, ++ (ra_alloc1(as, refkey, RSET_FPR) & 15)); ++#endif ++ } ++ else { ++ emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key); ++ emit_opk(as, ++ ARMI_ADD, ++ tmp, ++ key, ++ (int32_t)HASH_BIAS, ++ rset_exclude(rset_exclude(RSET_GPR, tab), key)); ++ } ++ } ++ } ++} ++ ++static void asm_hrefk(ASMState *as, IRIns *ir) ++{ ++ IRIns *kslot = IR(ir->op2); ++ IRIns *irkey = IR(kslot->op1); ++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); ++ int32_t kofs = ofs + (int32_t)offsetof(Node, key); ++ Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg key = RID_NONE, type = RID_TMP, idx = node; ++ RegSet allow = rset_exclude(RSET_GPR, node); ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); ++ if (ofs > 4095) { ++ idx = dest; ++ rset_clear(allow, dest); ++ kofs = (int32_t)offsetof(Node, key); ++ } ++ else if (ra_hasreg(dest)) { ++ emit_opk(as, ARMI_ADD, dest, node, ofs, allow); ++ } ++ asm_guardcc(as, CC_NE); ++ if (!irt_ispri(irkey->t)) { ++ RegSet even = (as->freeset & allow); ++ even = even & (even >> 1) & RSET_GPREVEN; ++ if (even) { ++ key = ra_scratch(as, even); ++ if (rset_test(as->freeset, key + 1)) { ++ type = key + 1; ++ ra_modified(as, type); ++ } ++ } ++ else { ++ key = ra_scratch(as, allow); ++ } ++ rset_clear(allow, key); ++ } ++ rset_clear(allow, type); ++ if (irt_isnum(irkey->t)) { ++ emit_opk(as, ++ ARMI_CMP, ++ 0, ++ type, ++ (int32_t)ir_knum(irkey)->u32.hi, ++ allow); ++ ARMI_IT(CC_EQ); ++ emit_opk(as, ++ ARMI_CMP, ++ 0, ++ key, ++ (int32_t)ir_knum(irkey)->u32.lo, ++ allow); ++ } ++ else { ++ if (ra_hasreg(key)) { ++ emit_opk(as, ARMI_CMP, 0, key, irkey->i, allow); ++ ARMI_IT(CC_EQ); ++ } ++ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype(irkey->t)), type); ++ } ++ emit_lso(as, ARMI_LDR, type, idx, kofs + 4); ++ if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs); ++ if (ofs > 4095) ++ emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); ++} ++ ++static void asm_uref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; ++ emit_lsptr(as, ARMI_LDR, dest, v); ++ } ++ else { ++ Reg uv = ra_scratch(as, RSET_GPR); ++ Reg func = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (ir->o == IR_UREFC) { ++ asm_guardcc(as, CC_NE); ++ emit_n(as, ARMC_K12(ARMI_CMP, 1), RID_TMP); ++ emit_opk(as, ++ ARMI_ADD, ++ dest, ++ uv, ++ (int32_t)offsetof(GCupval, tv), ++ RSET_GPR); ++ emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); ++ } ++ else { ++ emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); ++ } ++ emit_lso(as, ++ ARMI_LDR, ++ uv, ++ func, ++ (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); ++ } ++} ++ ++static void asm_fref(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); ++ lj_assertA(!ra_used(ir), "unfused FREF"); ++} ++ ++static void asm_strref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ IRRef ref = ir->op2, refk = ir->op1; ++ Reg r; ++ if (irref_isk(ref)) { ++ IRRef tmp = refk; refk = ref; ref = tmp; ++ } ++ else if (!irref_isk(refk)) { ++ uint32_t k, m = ARMC_K12(0, sizeof(GCstr)); ++ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); ++ IRIns *irr = IR(ir->op2); ++ if (ra_hasreg(irr->r)) { ++ ra_noweak(as, irr->r); ++ right = irr->r; ++ } ++ else if (mayfuse(as, irr->op2) && ++ irr->o == IR_ADD && irref_isk(irr->op2) && ++ (k = emit_isk12(ARMI_ADD, ++ (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) { ++ m = k; ++ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); ++ } ++ else { ++ right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ } ++ emit_dn(as, ARMI_ADD ^ m, dest, dest); ++ emit_dnm(as, ARMI_ADD, dest, left, right); ++ return; ++ } ++ r = ra_alloc1(as, ref, RSET_GPR); ++ emit_opk(as, ++ ARMI_ADD, ++ dest, ++ r, ++ sizeof(GCstr) + IR(refk)->i, ++ rset_exclude(RSET_GPR, r)); ++} ++ ++/* -- Loads and stores ---------------------------------------------------- */ ++ ++static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); ++ switch (irt_type(ir->t)) { ++ case IRT_I8: return ARMI_LDRSB; ++ case IRT_U8: return ARMI_LDRB; ++ case IRT_I16: return ARMI_LDRSH; ++ case IRT_U16: return ARMI_LDRH; ++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ ++ default: return ARMI_LDR; ++ } ++} ++ ++static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); ++ switch (irt_type(ir->t)) { ++ case IRT_I8: case IRT_U8: return ARMI_STRB; ++ case IRT_I16: case IRT_U16: return ARMI_STRH; ++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ ++ default: return ARMI_STR; ++ } ++} ++ ++static void asm_fload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ ARMIns ai = asm_fxloadins(as, ir); ++ Reg idx; ++ int32_t ofs; ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ ++ idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); ++ ofs = 0; ++ } else { ++ idx = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (ir->op2 == IRFL_TAB_ARRAY) { ++ ofs = asm_fuseabase(as, ir->op1); ++ if (ofs) { ++ /* Turn the t->array load into an add for colocated arrays. */ ++ emit_dn(as, ARMC_K12(ARMI_ADD, ofs), dest, idx); ++ return; ++ } ++ } ++ ofs = field_ofs[ir->op2]; ++ } ++ emit_lso(as, ai, dest, idx, ofs); ++} ++ ++static void asm_fstore(ASMState *as, IRIns *ir) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1(as, ir->op2, RSET_GPR); ++ IRIns *irf = IR(ir->op1); ++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); ++ int32_t ofs = field_ofs[irf->op2]; ++ ARMIns ai = asm_fxstoreins(as, ir); ++ emit_lso(as, ai, src, idx, ofs); ++ } ++} ++ ++static void asm_xload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ++ ir, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); ++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); ++} ++ ++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1(as, ++ ir->op2, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ asm_fusexref(as, ++ asm_fxstoreins(as, ir), ++ src, ++ ir->op1, ++ rset_exclude(RSET_GPR, src), ++ ofs); ++ } ++} ++ ++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) ++ ++static void asm_ahuvload(ASMState *as, IRIns *ir) ++{ ++ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP); ++ IRType t = hiop ? IRT_NUM : irt_type(ir->t); ++ Reg dest = RID_NONE, type = RID_NONE, idx; ++ RegSet allow = RSET_GPR; ++ int32_t ofs = 0; ++ if (hiop && ra_used(ir + 1)) { ++ type = ra_dest(as, ir + 1, allow); ++ rset_clear(allow, type); ++ } ++ if (ra_used(ir)) { ++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad load type %d", irt_type(ir->t)); ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ } ++ idx = asm_fuseahuref(as, ++ ir->op1, ++ &ofs, ++ allow, ++ (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); ++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; ++ if (!hiop || type == RID_NONE) { ++ rset_clear(allow, idx); ++ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && ++ rset_test((as->freeset & allow), dest + 1)) { ++ type = dest + 1; ++ ra_modified(as, type); ++ } ++ else { ++ type = RID_TMP; ++ } ++ } ++ asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); ++ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type); ++ if (ra_hasreg(dest)) { ++#if !LJ_SOFTFP ++ if (t == IRT_NUM) ++ emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs); ++ else ++#endif ++ emit_lso(as, ARMI_LDR, dest, idx, ofs); ++ } ++ emit_lso(as, ARMI_LDR, type, idx, ofs + 4); ++} ++ ++static void asm_ahustore(ASMState *as, IRIns *ir) ++{ ++ if (ir->r != RID_SINK) { ++ RegSet allow = RSET_GPR; ++ Reg idx, src = RID_NONE, type = RID_NONE; ++ int32_t ofs = 0; ++#if !LJ_SOFTFP ++ if (irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, RSET_FPR); ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); ++ emit_vlso(as, ARMI_VSTR_D, src, idx, ofs); ++ } ++ else ++#endif ++ { ++ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP); ++ if (!irt_ispri(ir->t)) { ++ src = ra_alloc1(as, ir->op2, allow); ++ rset_clear(allow, src); ++ } ++ if (hiop) ++ type = ra_alloc1(as, (ir + 1)->op2, allow); ++ else ++ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); ++ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); ++ if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); ++ emit_lso(as, ARMI_STR, type, idx, ofs + 4); ++ } ++ } ++} ++ ++static void asm_sload(ASMState *as, IRIns *ir) ++{ ++ int32_t ofs = 8*((int32_t)ir->op1 - 1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); ++ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP); ++ IRType t = hiop ? IRT_NUM : irt_type(ir->t); ++ Reg dest = RID_NONE, type = RID_NONE, base; ++ RegSet allow = RSET_GPR; ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); ++#if LJ_SOFTFP ++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), ++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ ++ if (hiop && ra_used(ir + 1)) { ++ type = ra_dest(as, ir + 1, allow); ++ rset_clear(allow, type); ++ } ++#else ++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { ++ dest = ra_scratch(as, RSET_FPR); ++ asm_tointg(as, ir, dest); ++ t = IRT_NUM; /* Continue with a regular number type check. */ ++ } ++ else ++#endif ++ if (ra_used(ir)) { ++ Reg tmp = RID_NONE; ++ if ((ir->op2 & IRSLOAD_CONVERT)) ++ tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); ++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad SLOAD type %d", irt_type(ir->t)); ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ base = ra_alloc1(as, REF_BASE, allow); ++ if ((ir->op2 & IRSLOAD_CONVERT)) { ++ if (t == IRT_INT) { ++ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); ++ emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15)); ++ t = IRT_NUM; /* Check for original type. */ ++ } ++ else { ++ emit_tm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15)); ++ emit_tn(as, ARMI_VMOV_S_R, tmp, (dest & 15)); ++ t = IRT_INT; /* Check for original type. */ ++ } ++ dest = tmp; ++ } ++ goto dotypecheck; ++ } ++ base = ra_alloc1(as, REF_BASE, allow); ++dotypecheck: ++ rset_clear(allow, base); ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) { ++ if (ra_noreg(type)) { ++ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && ++ rset_test((as->freeset & allow), dest + 1)) { ++ type = dest + 1; ++ ra_modified(as, type); ++ } ++ else { ++ type = RID_TMP; ++ } ++ } ++ asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); ++ if ((ir->op2 & IRSLOAD_KEYINDEX)) { ++ emit_n(as, ARMC_K12(ARMI_CMN, 1), type); ++ emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type); ++ } else { ++ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type); ++ } ++ } ++ if (ra_hasreg(dest)) { ++#if !LJ_SOFTFP ++ if (t == IRT_NUM) { ++ if (ofs < 1024) { ++ emit_vlso(as, ARMI_VLDR_D, dest, base, ofs); ++ } ++ else { ++ if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4); ++ emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0); ++ emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow); ++ return; ++ } ++ } ++ else ++#endif ++ emit_lso(as, ARMI_LDR, dest, base, ofs); ++ } ++ if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4); ++} ++ ++/* -- Allocations --------------------------------------------------------- */ ++ ++#if LJ_HASFFI ++static void asm_cnew(ASMState *as, IRIns *ir) ++{ ++ CTState *cts = ctype_ctsG(J2G(as->J)); ++ CTypeID id = (CTypeID)IR(ir->op1)->i; ++ CTSize sz; ++ CTInfo info = lj_ctype_info(cts, id, &sz); ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; ++ IRRef args[4]; ++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); ++ RegSet drop = RSET_SCRATCH; ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); ++ ++ as->gcsteps++; ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); ++ if (ra_used(ir)) ++ ra_destreg(as, ir, RID_RET); /* GCcdata * */ ++ ++ /* Initialize immutable cdata object. */ ++ if (ir->o == IR_CNEWI) { ++ int32_t ofs = sizeof(GCcdata); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); ++ if (sz == 8) { ++ ofs += 4; ir++; ++ lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); ++ } ++ for (;;) { ++ Reg r = ra_alloc1(as, ir->op2, allow); ++ emit_lso(as, ARMI_STR, r, RID_RET, ofs); ++ rset_clear(allow, r); ++ if (ofs == sizeof(GCcdata)) break; ++ ofs -= 4; ir--; ++ } ++ } ++ else if (ir->op2 != REF_NIL) { ++ /* Create VLA/VLS/aligned cdata. */ ++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ir->op1; /* CTypeID id */ ++ args[2] = ir->op2; /* CTSize sz */ ++ args[3] = ASMREF_TMP1; /* CTSize align */ ++ asm_gencall(as, ci, args); ++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); ++ return; ++ } ++ ++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ ++ { ++ uint32_t k = emit_isk12(ARMI_MOV, id); ++ Reg r = k ? RID_R1 : ra_allock(as, id, allow); ++ emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); ++ emit_lso(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); ++ emit_d(as, ARMC_K12(ARMI_MOV, ~LJ_TCDATA), RID_TMP); ++ if (k) emit_d(as, ARMI_MOV ^ k, RID_R1); ++ } ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ASMREF_TMP1; /* MSize size */ ++ asm_gencall(as, ci, args); ++ ra_allockreg(as, ++ (int32_t)(sz + sizeof(GCcdata)), ++ ra_releasetmp(as, ASMREF_TMP1)); ++} ++#endif ++ ++/* -- Write barriers ------------------------------------------------------ */ ++ ++static void asm_tbar(ASMState *as, IRIns *ir) ++{ ++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); ++ Reg gr = ra_allock(as, ++ i32ptr(J2G(as->J)), ++ rset_exclude(rset_exclude(RSET_GPR, tab), link)); ++ Reg mark = RID_TMP; ++ MCLabel l_end = emit_label(as); ++ emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist)); ++ emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); ++ emit_lso(as, ++ ARMI_STR, ++ tab, ++ gr, ++ (int32_t)offsetof(global_State, gc.grayagain)); ++ emit_dn(as, ARMC_K12(ARMI_BIC, LJ_GC_BLACK), mark, mark); ++ emit_lso(as, ++ ARMI_LDR, ++ link, ++ gr, ++ (int32_t)offsetof(global_State, gc.grayagain)); ++ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); ++ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), mark); ++ emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); ++} ++ ++static void asm_obar(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg obj, val, tmp; ++ /* No need for other object barriers (yet). */ ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ir->op1; /* TValue *tv */ ++ asm_gencall(as, ci, args); ++ //if ((l_end[-1] >> 28) == CC_AL) jturnsek!!! ++ // l_end[-1] = ARMF_CC(l_end[-1], CC_NE); ++ //else { ++ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); ++ //} ++ ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1)); ++ obj = IR(ir->op1)->r; ++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); ++ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), tmp); ++ ARMI_IT(CC_NE); ++ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_WHITES), RID_TMP); ++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); ++ emit_lso(as, ++ ARMI_LDRB, ++ tmp, ++ obj, ++ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); ++ emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); ++} ++ ++/* -- Arithmetic and logic operations ------------------------------------- */ ++ ++#if !LJ_SOFTFP ++static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15)); ++} ++ ++static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ emit_tm(as, ai, (dest & 15), (left & 15)); ++} ++ ++static void asm_callround(ASMState *as, IRIns *ir, int id) ++{ ++ /* The modified regs must match with the *.dasc implementation. */ ++ RegSet drop = RID2RSET(RID_R0) | RID2RSET(RID_R1) | RID2RSET(RID_R2) | ++ RID2RSET(RID_R3) | RID2RSET(RID_R12); ++ RegSet of; ++ Reg dest, src; ++ ra_evictset(as, drop); ++ dest = ra_dest(as, ir, RSET_FPR); ++ emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); ++ emit_call(as, ++ id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : ++ id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : ++ (void *)lj_vm_trunc_sf); ++ /* Workaround to protect argument GPRs from being used for remat. */ ++ of = as->freeset; ++ as->freeset &= ~RSET_RANGE(RID_R0, RID_R1 + 1); ++ as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); ++ src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ ++ as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1 + 1)); ++ emit_tnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); ++} ++ ++static void asm_fpmath(ASMState *as, IRIns *ir) ++{ ++ if (ir->op2 <= IRFPM_TRUNC) ++ asm_callround(as, ir, ir->op2); ++ else if (ir->op2 == IRFPM_SQRT) ++ asm_fpunary(as, ir, ARMI_VSQRT_D); ++ else ++ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); ++} ++#endif ++ ++static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) ++{ ++ IRIns *ir; ++ if (irref_isk(rref)) ++ return 0; /* Don't swap constants to the left. */ ++ if (irref_isk(lref)) ++ return 1; /* But swap constants to the right. */ ++ ir = IR(rref); ++ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || ++ (ir->o == IR_ADD && ir->op1 == ir->op2)) ++ return 0; /* Don't swap fusable operands to the left. */ ++ ir = IR(lref); ++ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || ++ (ir->o == IR_ADD && ir->op1 == ir->op2)) ++ return 1; /* But swap fusable operands to the right. */ ++ return 0; /* Otherwise don't swap. */ ++} ++ ++static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) ++{ ++ IRRef lref = ir->op1, rref = ir->op2; ++ Reg left, dest = ra_dest(as, ir, RSET_GPR); ++ uint32_t m, rs = 0; ++ if (asm_swapops(as, lref, rref)) { ++ IRRef tmp = lref; lref = rref; rref = tmp; ++ if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC) ++ ai ^= (ARMI_SUB ^ ARMI_RSB); ++ } ++ left = ra_hintalloc(as, lref, dest, RSET_GPR); ++ m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left), &rs); ++ if (irt_isguard(ir->t)) { ++ /* For IR_ADDOV etc. */ ++ asm_guardcc(as, CC_VS); ++ ai |= ARMI_S; ++ } ++ emit_dn(as, ai ^ m, dest, left); ++ if (rs) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); ++} ++ ++/* Try to drop cmp r, #0. */ ++static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai) //jturnsek!!! ++{ ++ if (as->flagmcp == as->mcp) { ++ uint32_t cc = ((as->mcp[1] >> 20) & 0xf); ++ as->flagmcp = NULL; ++ if (cc <= CC_NE) { ++ as->mcp++; ++ ai |= ARMI_S; ++ } ++ else if (cc == CC_GE) { ++ * ++as->mcp ^= ((CC_GE ^ CC_PL) << 20); ++ ai |= ARMI_S; ++ } ++ else if (cc == CC_LT) { ++ * ++as->mcp ^= ((CC_LT ^ CC_MI) << 20); ++ ai |= ARMI_S; ++ } /* else: other conds don't work in general. */ ++ } ++ return ai; ++} ++ ++static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) ++{ ++ asm_intop(as, ir, asm_drop_cmp0(as, ai)); ++} ++ ++static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_dn(as, ARMC_K12(ai, 0), dest, left); ++} ++ ++/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ ++static void asm_intmul(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ Reg tmp = RID_NONE; ++ /* ARMv5 restriction: dest != left and dest_hi != left. */ ++ if (dest == left && left != right) { left = right; right = dest; } ++ if (irt_isguard(ir->t)) { ++ /* IR_MULOV */ ++ if (!(as->flags & JIT_F_ARMV6) && dest == left) ++ tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left)); ++ asm_guardcc(as, CC_NE); ++ emit_nm(as, ARMI_TEQ | ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest); ++ emit_dnm(as, ARMI_SMULL | ARMF_T(dest), RID_TMP, left, right); ++ } ++ else { ++ if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP; ++ emit_dnm(as, ARMI_MUL, dest, left, right); ++ } ++ /* Only need this for the dest == left == right case. */ ++ if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); ++} ++ ++static void asm_add(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ if (irt_isnum(ir->t)) { ++ if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D)) ++ asm_fparith(as, ir, ARMI_VADD_D); ++ return; ++ } ++#endif ++ asm_intop_s(as, ir, ARMI_ADD); ++} ++ ++static void asm_sub(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ if (irt_isnum(ir->t)) { ++ if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D)) ++ asm_fparith(as, ir, ARMI_VSUB_D); ++ return; ++ } ++#endif ++ asm_intop_s(as, ir, ARMI_SUB); ++} ++ ++static void asm_mul(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ if (irt_isnum(ir->t)) { ++ asm_fparith(as, ir, ARMI_VMUL_D); ++ return; ++ } ++#endif ++ asm_intmul(as, ir); ++} ++ ++#define asm_addov(as, ir) asm_add(as, ir) ++#define asm_subov(as, ir) asm_sub(as, ir) ++#define asm_mulov(as, ir) asm_mul(as, ir) ++ ++#if !LJ_SOFTFP ++#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) ++#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) ++#endif ++ ++static void asm_neg(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ if (irt_isnum(ir->t)) { ++ asm_fpunary(as, ir, ARMI_VNEG_D); ++ return; ++ } ++#endif ++ asm_intneg(as, ir, ARMI_RSB); ++} ++ ++static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) ++{ ++ ai = asm_drop_cmp0(as, ai); ++ if (ir->op2 == 0) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ uint32_t rs = 0; ++ uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR, &rs); ++ emit_d(as, ai ^ m, dest); ++ if (rs) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); ++ } ++ else { ++ /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ ++ asm_intop(as, ir, ai); ++ } ++} ++ ++#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) ++ ++static void asm_bswap(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ if ((as->flags & JIT_F_ARMV6)) { ++ emit_dnm(as, ARMI_REV, dest, left, left); ++ } ++ else { ++ Reg tmp2 = dest; ++ if (tmp2 == left) ++ tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left)); ++ emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP); ++ emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_ROR, 8), tmp2, left); ++ emit_dn(as, ARMC_K12(ARMI_BIC, 256 * 8 | 127), RID_TMP, RID_TMP); ++ emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left); ++ } ++} ++ ++#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) ++#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) ++#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) ++ ++static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) ++{ ++ if (irref_isk(ir->op2)) { ++ /* Constant shifts. */ ++ /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */ ++ /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ int32_t shift = (IR(ir->op2)->i & 31); ++ emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, left); ++ } ++ else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | ARMF_RSH(sh, right), dest, left); ++ } ++} ++ ++#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) ++#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) ++#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) ++#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) ++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") ++ ++static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) ++{ ++ uint32_t kcmp = 0, kmov = 0; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ Reg right = 0; ++ if (irref_isk(ir->op2)) { ++ kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i); ++ if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i); ++ } ++ if (!kmov) { ++ kcmp = 0; ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ } ++ if (kmov || dest != right) { ++ emit_dm(as, ARMI_MOV ^ kmov, dest, right); ++ ARMI_IT(cc); ++ cc ^= 1; /* Must use opposite conditions for paired moves. */ ++ } ++ else { ++ cc ^= (CC_LT ^ CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */ ++ } ++ if (dest != left) { ++ emit_dm(as, ARMI_MOV, dest, left); ++ ARMI_IT(cc); ++ } ++ emit_nm(as, ARMI_CMP ^ kcmp, left, right); ++} ++ ++#if LJ_SOFTFP ++static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; ++ RegSet drop = RSET_SCRATCH; ++ Reg r; ++ IRRef args[4]; ++ args[0] = ir->op1; args[1] = (ir + 1)->op1; ++ args[2] = ir->op2; args[3] = (ir + 1)->op2; ++ /* __aeabi_cdcmple preserves r0-r3. */ ++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ++ if (ra_hasreg((ir + 1)->r)) rset_clear(drop, (ir + 1)->r); ++ if (!rset_test(as->freeset, RID_R2) && ++ regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2); ++ if (!rset_test(as->freeset, RID_R3) && ++ regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3); ++ ra_evictset(as, drop); ++ ra_destpair(as, ir); ++ emit_dm(as, ARMI_MOV, RID_RETHI, RID_R3); ++ ARMI_IT(cc); ++ emit_dm(as, ARMI_MOV, RID_RETLO, RID_R2); ++ ARMI_IT(cc); ++ emit_call(as, (void *)ci->func); ++ for (r = RID_R0; r <= RID_R3; r++) ++ ra_leftov(as, r, args[r - RID_R0]); ++} ++#else ++static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) ++{ ++ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = ((left >> 8) & 15); left &= 15; ++ if (dest != left) { ++ emit_tm(as, ARMI_VMOV_D, dest, left); ++ ARMI_IT(cc ^ 1); ++ } ++ if (dest != right) { ++ emit_tm(as, ARMI_VMOV_D, dest, right); ++ ARMI_IT(cc); ++ } ++ emit_t(as, ARMI_VMRS, 0); ++ emit_tm(as, ARMI_VCMP_D, left, right); ++} ++#endif ++ ++static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) ++{ ++#if LJ_SOFTFP ++ UNUSED(fcc); ++#else ++ if (irt_isnum(ir->t)) ++ asm_fpmin_max(as, ir, fcc); ++ else ++#endif ++ asm_intmin_max(as, ir, cc); ++} ++ ++#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) ++#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) ++ ++/* -- Comparisons --------------------------------------------------------- */ ++ ++/* Map of comparisons to flags. ORDER IR. */ ++static const uint8_t asm_compmap[IR_ABC + 1] = { ++ /* op FP swp int cc FP cc */ ++ /* LT */ CC_GE + (CC_HS << 4), ++ /* GE x */ CC_LT + (CC_HI << 4), ++ /* LE */ CC_GT + (CC_HI << 4), ++ /* GT x */ CC_LE + (CC_HS << 4), ++ /* ULT x */ CC_HS + (CC_LS << 4), ++ /* UGE */ CC_LO + (CC_LO << 4), ++ /* ULE x */ CC_HI + (CC_LO << 4), ++ /* UGT */ CC_LS + (CC_LS << 4), ++ /* EQ */ CC_NE + (CC_NE << 4), ++ /* NE */ CC_EQ + (CC_EQ << 4), ++ /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ ++}; ++ ++#if LJ_SOFTFP ++/* FP comparisons. */ ++static void asm_sfpcomp(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; ++ RegSet drop = RSET_SCRATCH; ++ Reg r; ++ IRRef args[4]; ++ int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1); ++ args[swp ^ 0] = ir->op1; args[swp ^ 1] = (ir + 1)->op1; ++ args[swp ^ 2] = ir->op2; args[swp ^ 3] = (ir + 1)->op2; ++ /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */ ++ for (r = RID_R0; r <= RID_R3; r++) ++ if (!rset_test(as->freeset, r) && ++ regcost_ref(as->cost[r]) == args[r - RID_R0]) rset_clear(drop, r); ++ ra_evictset(as, drop); ++ asm_guardcc(as, (asm_compmap[ir->o] >> 4)); ++ emit_call(as, (void *)ci->func); ++ for (r = RID_R0; r <= RID_R3; r++) ++ ra_leftov(as, r, args[r - RID_R0]); ++} ++#else ++/* FP comparisons. */ ++static void asm_fpcomp(ASMState *as, IRIns *ir) ++{ ++ Reg left, right; ++ ARMIns ai; ++ int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); ++ if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { ++ left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15); ++ right = 0; ++ ai = ARMI_VCMPZ_D; ++ } ++ else { ++ left = ra_alloc2(as, ir, RSET_FPR); ++ if (swp) { ++ right = (left & 15); left = ((left >> 8) & 15); ++ } ++ else { ++ right = ((left >> 8) & 15); left &= 15; ++ } ++ ai = ARMI_VCMP_D; ++ } ++ asm_guardcc(as, (asm_compmap[ir->o] >> 4)); ++ emit_t(as, ARMI_VMRS, 0); ++ emit_tm(as, ai, left, right); ++} ++#endif ++ ++/* Integer comparisons. */ ++static void asm_intcomp(ASMState *as, IRIns *ir) ++{ ++ ARMCC cc = (asm_compmap[ir->o] & 15); ++ IRRef lref = ir->op1, rref = ir->op2; ++ Reg left; ++ uint32_t m, rs = 0; ++ int cmpprev0 = 0; ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), ++ "bad comparison data type %d", irt_type(ir->t)); ++ if (asm_swapops(as, lref, rref)) { ++ Reg tmp = lref; lref = rref; rref = tmp; ++ if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ ++ else if(cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ ++ } ++ if (irref_isk(rref) && IR(rref)->i == 0) { ++ IRIns *irl = IR(lref); ++ cmpprev0 = (irl + 1 == ir); ++ /* Combine comp(BAND(left, right), 0) into tst left, right. */ ++ if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { ++ IRRef blref = irl->op1, brref = irl->op2; ++ uint32_t m2 = 0; ++ Reg bleft; ++ if (asm_swapops(as, blref, brref)) { ++ Reg tmp = blref; blref = brref; brref = tmp; ++ } ++ if (irref_isk(brref)) { ++ m2 = emit_isk12(ARMI_AND, IR(brref)->i); ++ if ((m2 & (ARMI_AND ^ ARMI_BIC))) ++ goto notst; /* Not beneficial if we miss a constant operand. */ ++ } ++ if (cc == CC_GE) cc = CC_PL; ++ else if (cc == CC_LT) cc = CC_MI; ++ else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ ++ bleft = ra_alloc1(as, blref, RSET_GPR); ++ if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft), &rs); ++ asm_guardcc(as, cc); ++ emit_n(as, ARMI_TST ^ m2, bleft); ++ if (rs) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m2 >> 16), (m2 >> 16)); ++ return; ++ } ++ } ++notst: ++ left = ra_alloc1(as, lref, RSET_GPR); ++ m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left), &rs); ++ asm_guardcc(as, cc); ++ emit_n(as, ARMI_CMP ^ m, left); ++ if (rs) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); ++ /* Signed comparison with zero and referencing previous ins? */ ++ if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE)) ++ as->flagmcp = as->mcp; /* Allow elimination of the compare. */ ++} ++ ++static void asm_comp(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ if (irt_isnum(ir->t)) ++ asm_fpcomp(as, ir); ++ else ++#endif ++ asm_intcomp(as, ir); ++} ++ ++#define asm_equal(as, ir) asm_comp(as, ir) ++ ++#if LJ_HASFFI ++/* 64 bit integer comparisons. */ ++static void asm_int64comp(ASMState *as, IRIns *ir) ++{ ++ int signedcomp = (ir->o <= IR_GT); ++ ARMCC cclo, cchi; ++ Reg leftlo, lefthi; ++ uint32_t mlo, mhi, rslo = 0, rshi = 0; ++ RegSet allow = RSET_GPR, oldfree; ++ ++ /* Always use unsigned comparison for loword. */ ++ cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15; ++ leftlo = ra_alloc1(as, ir->op1, allow); ++ oldfree = as->freeset; ++ mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo), &rslo); ++ allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */ ++ ++ /* Use signed or unsigned comparison for hiword. */ ++ cchi = asm_compmap[ir->o] & 15; ++ lefthi = ra_alloc1(as, (ir + 1)->op1, allow); ++ mhi = asm_fuseopm(as, ARMI_CMP, (ir + 1)->op2, rset_clear(allow, lefthi), &rshi); ++ ++ /* All register allocations must be performed _before_ this point. */ ++ if (signedcomp) { ++ MCLabel l_around = emit_label(as); ++ asm_guardcc(as, cclo); ++ emit_n(as, ARMI_CMP ^ mlo, leftlo); ++ if (rslo) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16)); ++ emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around); ++ if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */ ++ asm_guardcc(as, cchi); ++ } ++ else { ++ asm_guardcc(as, cclo); ++ emit_n(as, ARMI_CMP ^ mlo, leftlo); ++ if (rslo) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16)); ++ ARMI_IT(CC_EQ); ++ } ++ emit_n(as, ARMI_CMP ^ mhi, lefthi); ++ if (rshi) ++ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rshi, (mhi >> 16), (mhi >> 16)); ++} ++#endif ++ ++/* -- Split register ops -------------------------------------------------- */ ++ ++/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */ ++static void asm_hiop(ASMState *as, IRIns *ir) ++{ ++ /* HIOP is marked as a store because it needs its own DCE logic. */ ++ int uselo = ra_used(ir - 1), usehi = ra_used(ir); /* Loword/hiword used? */ ++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++#if LJ_HASFFI || LJ_SOFTFP ++ if ((ir - 1)->o <= IR_NE) { ++ /* 64 bit integer or FP comparisons. ORDER IR. */ ++ as->curins--; /* Always skip the loword comparison. */ ++#if LJ_SOFTFP ++ if (!irt_isint(ir->t)) { ++ asm_sfpcomp(as, ir - 1); ++ return; ++ } ++#endif ++#if LJ_HASFFI ++ asm_int64comp(as, ir - 1); ++#endif ++ return; ++#if LJ_SOFTFP ++ } ++ else if ((ir - 1)->o == IR_MIN || (ir - 1)->o == IR_MAX) { ++ as->curins--; /* Always skip the loword min/max. */ ++ if (uselo || usehi) ++ asm_sfpmin_max(as, ir - 1, (ir - 1)->o == IR_MIN ? CC_PL : CC_LE); ++ return; ++#elif LJ_HASFFI ++ } ++ else if ((ir - 1)->o == IR_CONV) { ++ as->curins--; /* Always skip the CONV. */ ++ if (usehi || uselo) ++ asm_conv64(as, ir); ++ return; ++#endif ++ } ++ else if ((ir - 1)->o == IR_XSTORE) { ++ if ((ir - 1)->r != RID_SINK) ++ asm_xstore_(as, ir, 4); ++ return; ++ } ++#endif ++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ ++ switch ((ir - 1)->o) { ++#if LJ_HASFFI ++ case IR_ADD: ++ as->curins--; ++ asm_intop(as, ir, ARMI_ADC); ++ asm_intop(as, ir - 1, ARMI_ADD | ARMI_S); ++ break; ++ case IR_SUB: ++ as->curins--; ++ asm_intop(as, ir, ARMI_SBC); ++ asm_intop(as, ir - 1, ARMI_SUB | ARMI_S); ++ break; ++ case IR_NEG: ++ as->curins--; ++ { ++ /* asm_intnegr */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_dn(as, ARMC_K12(ARMI_SBC, 0), left, dest); ++ } ++ asm_intneg(as, ir - 1, ARMI_RSB | ARMI_S); ++ break; ++ case IR_CNEWI: ++ /* Nothing to do here. Handled by lo op itself. */ ++ break; ++#endif ++#if LJ_SOFTFP ++ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: ++ case IR_STRTO: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ ++ break; ++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: ++ /* Nothing to do here. Handled by lo op itself. */ ++ break; ++#endif ++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ ++ break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; ++ } ++} ++ ++/* -- Profiling ----------------------------------------------------------- */ ++ ++static void asm_prof(ASMState *as, IRIns *ir) ++{ ++ UNUSED(ir); ++ asm_guardcc(as, CC_NE); ++ emit_n(as, ARMC_K12(ARMI_TST, HOOK_PROFILE), RID_TMP); ++ emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); ++} ++ ++/* -- Stack handling ------------------------------------------------------ */ ++ ++/* Check Lua stack size for overflow. Use exit handler as fallback. */ ++static void asm_stack_check(ASMState *as, ++ BCReg topslot, ++ IRIns *irp, ++ RegSet allow, ++ ExitNo exitno) ++{ ++ Reg pbase; ++ uint32_t k; ++ if (irp) { ++ if (!ra_hasspill(irp->s)) { ++ pbase = irp->r; ++ lj_assertA(ra_hasreg(pbase), "base reg lost"); ++ } ++ else if (allow) { ++ pbase = rset_pickbot(allow); ++ } ++ else { ++ pbase = RID_RET; ++ emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ ++ } ++ } ++ else { ++ pbase = RID_BASE; ++ } ++ emit_branchlink(as, ARMI_BL, exitstub_addr(as->J, exitno)); ++ ARMI_IT(CC_LS); ++ k = emit_isk12(0, (int32_t)(8*topslot)); ++ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); ++ emit_n(as, ARMI_CMP ^ k, RID_TMP); ++ emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); ++ emit_lso(as, ++ ARMI_LDR, ++ RID_TMP, ++ RID_TMP, ++ (int32_t)offsetof(lua_State, maxstack)); ++ if (irp) { ++ /* Must not spill arbitrary registers in head of side trace. */ ++ int32_t i = i32ptr(&J2G(as->J)->cur_L); ++ if (ra_hasspill(irp->s)) ++ emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); ++ emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); ++ if (ra_hasspill(irp->s) && !allow) ++ emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ ++ emit_loadi(as, RID_TMP, (i & ~4095)); ++ } ++ else { ++ emit_getgl(as, RID_TMP, cur_L); ++ } ++} ++ ++/* Restore Lua stack from on-trace state. */ ++static void asm_stack_restore(ASMState *as, SnapShot *snap) ++{ ++ SnapEntry *map = &as->T->snapmap[snap->mapofs]; ++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap) - 1]; ++ MSize n, nent = snap->nent; ++ /* Store the value of all modified slots to the Lua stack. */ ++ for (n = 0; n < nent; n++) { ++ SnapEntry sn = map[n]; ++ BCReg s = snap_slot(sn); ++ int32_t ofs = 8*((int32_t)s - 1); ++ IRRef ref = snap_ref(sn); ++ IRIns *ir = IR(ref); ++ if ((sn & SNAP_NORESTORE)) ++ continue; ++ if (irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); ++ Reg tmp; ++ /* LJ_SOFTFP: must be a number constant. */ ++ lj_assertA(irref_isk(ref), "unsplit FP op"); ++ tmp = ra_allock(as, ++ (int32_t)ir_knum(ir)->u32.lo, ++ rset_exclude(RSET_GPREVEN, RID_BASE)); ++ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); ++ if (rset_test(as->freeset, tmp + 1)) odd = RID2RSET(tmp + 1); ++ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); ++ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs + 4); ++#else ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); ++#endif ++ } ++ else { ++ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); ++ Reg type; ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "restore of IR type %d", irt_type(ir->t)); ++ if (!irt_ispri(ir->t)) { ++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); ++ emit_lso(as, ARMI_STR, src, RID_BASE, ofs); ++ if (rset_test(as->freeset, src + 1)) odd = RID2RSET(src + 1); ++ } ++ if ((sn & (SNAP_CONT | SNAP_FRAME))) { ++ if (s == 0) continue; /* Do not overwrite link to previous frame. */ ++ type = ra_allock(as, (int32_t)(*flinks--), odd); ++#if LJ_SOFTFP ++ } ++ else if ((sn & SNAP_SOFTFPNUM)) { ++ type = ra_alloc1(as, ref + 1, rset_exclude(RSET_GPRODD, RID_BASE)); ++#endif ++ } else if ((sn & SNAP_KEYINDEX)) { ++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); ++ } ++ else { ++ type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); ++ } ++ emit_lso(as, ARMI_STR, type, RID_BASE, ofs + 4); ++ } ++ checkmclim(as); ++ } ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); ++} ++ ++/* -- GC handling --------------------------------------------------------- */ ++ ++/* Marker to prevent patching the GC check exit. */ ++#define ARM_NOPATCH_GC_CHECK (ARMC_K12(ARMI_BIC, 0)) ++ ++/* Check GC threshold and do one or more GC steps. */ ++static void asm_gc_check(ASMState *as) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg tmp1, tmp2; ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ ++ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ ++ * --as->mcp = ARM_NOPATCH_GC_CHECK; ++ emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ASMREF_TMP2; /* MSize steps */ ++ asm_gencall(as, ci, args); ++ tmp1 = ra_releasetmp(as, ASMREF_TMP1); ++ tmp2 = ra_releasetmp(as, ASMREF_TMP2); ++ emit_loadi(as, tmp2, as->gcsteps); ++ /* Jump around GC step if GC total < GC threshold. */ ++ emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end); ++ emit_nm(as, ARMI_CMP, RID_TMP, tmp2); ++ emit_lso(as, ++ ARMI_LDR, ++ tmp2, ++ tmp1, ++ (int32_t)offsetof(global_State, gc.threshold)); ++ emit_lso(as, ++ ARMI_LDR, ++ RID_TMP, ++ tmp1, ++ (int32_t)offsetof(global_State, gc.total)); ++ ra_allockreg(as, i32ptr(J2G(as->J)), tmp1); ++ as->gcsteps = 0; ++ checkmclim(as); ++} ++ ++/* -- Loop handling ------------------------------------------------------- */ ++ ++/* Fixup the loop branch. */ ++static void asm_loop_fixup(ASMState *as) ++{ ++ MCode *p = as->mctop; ++ MCode *target = as->mcp; ++ if (as->loopinv) { ++ /* Inverted loop branch? */ ++ /* asm_guardcc already inverted the bcc and patched the final bl. */ ++ p[-2] |= ARMC_B((uint32_t)((target - p + 1) << 1)); ++ } ++ else { ++ p[-1] = ARMI_B_T4 | ARMC_BL((uint32_t)((target - p) << 1)); ++ } ++} ++ ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do. */ ++} ++ ++/* -- Head of trace ------------------------------------------------------- */ ++ ++/* Reload L register from g->cur_L. */ ++static void asm_head_lreg(ASMState *as) ++{ ++ IRIns *ir = IR(ASMREF_L); ++ if (ra_used(ir)) { ++ Reg r = ra_dest(as, ir, RSET_GPR); ++ emit_getgl(as, r, cur_L); ++ ra_evictk(as); ++ } ++} ++ ++/* Coalesce BASE register for a root trace. */ ++static void asm_head_root_base(ASMState *as) ++{ ++ IRIns *ir; ++ asm_head_lreg(as); ++ ir = IR(REF_BASE); ++ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) ++ ra_spill(as, ir); ++ ra_destreg(as, ir, RID_BASE); ++} ++ ++/* Coalesce BASE register for a side trace. */ ++static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) ++{ ++ IRIns *ir; ++ asm_head_lreg(as); ++ ir = IR(REF_BASE); ++ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) ++ ra_spill(as, ir); ++ if (ra_hasspill(irp->s)) { ++ rset_clear(allow, ra_dest(as, ir, allow)); ++ } ++ else { ++ Reg r = irp->r; ++ lj_assertA(ra_hasreg(r), "base reg lost"); ++ rset_clear(allow, r); ++ if (r != ir->r && !rset_test(as->freeset, r)) ++ ra_restore(as, regcost_ref(as->cost[r])); ++ ra_destreg(as, ir, r); ++ } ++ return allow; ++} ++ ++/* -- Tail of trace ------------------------------------------------------- */ ++ ++/* Fixup the tail code. */ ++static void asm_tail_fixup(ASMState *as, TraceNo lnk) ++{ ++ MCode *p = as->mctop; ++ MCode *target; ++ int32_t spadj = as->T->spadjust; ++ if (spadj == 0) { ++ as->mctop = --p; ++ } ++ else { ++ /* Patch stack adjustment. */ ++ uint32_t k = emit_isk12(ARMI_ADD, spadj); ++ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); ++ p[-2] = (ARMI_ADD ^ k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); ++ } ++ /* Patch exit branch. */ ++ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)as->J->exitstubgroup[0] - 8;//lj_vm_exit_interp; ++ p[-1] = ARMI_B_T4 | ARMC_BL((target - p) << 1); ++} ++ ++/* Prepare tail of code. */ ++static void asm_tail_prep(ASMState *as) ++{ ++ MCode *p = as->mctop - 1; /* Leave room for exit branch. */ ++ if (as->loopref) { ++ as->invmcp = as->mcp = p; ++ } ++ else { ++ as->mcp = p - 1; /* Leave room for stack pointer adjustment. */ ++ as->invmcp = NULL; ++ } ++ *p = 0; /* Prevent load/store merging. */ ++} ++ ++/* -- Trace setup --------------------------------------------------------- */ ++ ++/* Ensure there are enough stack slots for call arguments. */ ++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ IRRef args[CCI_NARGS_MAX * 2]; ++ uint32_t i, nargs = CCI_XNARGS(ci); ++ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; ++ asm_collectargs(as, ir, ci, args); ++ for (i = 0; i < nargs; i++) { ++ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { ++ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { ++ if (irt_isnum(IR(args[i])->t)) { ++ if (nfpr > 0) nfpr--; ++ else fprodd = 0, nslots = (nslots + 3) & ~1; ++ } ++ else { ++ if (fprodd) fprodd--; ++ else if (nfpr > 0) fprodd = 1, nfpr--; ++ else nslots++; ++ } ++ } ++ else if (irt_isnum(IR(args[i])->t)) { ++ ngpr &= ~1; ++ if (ngpr > 0) ngpr -= 2; ++ else nslots += 2; ++ } ++ else { ++ if (ngpr > 0) ngpr--; ++ else nslots++; ++ } ++ } ++ else { ++ if (ngpr > 0) ngpr--; ++ else nslots++; ++ } ++ } ++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */ ++ as->evenspill = nslots; ++ return REGSP_HINT(RID_RET); ++} ++ ++static void asm_setup_target(ASMState *as) ++{ ++ /* May need extra exit for asm_stack_check on side traces. */ ++ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); ++} ++ ++/* -- Trace patching ------------------------------------------------------ */ ++ ++/* Patch exit jumps of existing machine code to a new target. */ ++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) ++{ ++ MCode *p = T->mcode; ++ MCode *pe = (MCode *)((char *)p + T->szmcode); ++ MCode *cstart = NULL, *cend = p; ++ MCode *mcarea = lj_mcode_patch(J, p, 0); ++ MCode *px = exitstub_addr(J, exitno) - 1; ++ for (; p < pe; p++) { ++ /* Look for bl_cc exitstub, replace with b_cc target. */ ++ uint32_t ins = *p; ++ if ((ins & 0xd000f800u) == 0xd000f000u && ++ (((ARMC_BL_READ(ins) >> 1) ^ (px - p)) & 0x007fffffu) == 0 && ++ p[-1] != ARM_NOPATCH_GC_CHECK) { ++ *p = ARMI_B_T4 | ARMC_BL((uint32_t)(((target - p) - 1) << 1)); ++ cend = p + 1; ++ if (!cstart) cstart = p; ++ } ++ } ++ lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); ++ lj_mcode_sync(cstart, cend); ++ lj_mcode_patch(J, mcarea, 1); ++} ++ +diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c +index 43e44305..2e6b6fd4 100644 +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -151,7 +151,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) + uint32_t *p = page; + void *target = (void *)lj_vm_ffi_callback; + MSize slot; +- /* This must match with the saveregs macro in buildvm_arm.dasc. */ ++ /* This must match with the saveregs macro in buildvm_arm.dasc. */ //jturnsek!!! + *p++ = ARMI_SUB|ARMF_D(RID_R12)|ARMF_N(RID_R12)|ARMF_M(RID_PC); + *p++ = ARMI_PUSH|ARMF_N(RID_SP)|RSET_RANGE(RID_R4,RID_R11+1)|RID2RSET(RID_LR); + *p++ = ARMI_SUB|ARMI_K12|ARMF_D(RID_R12)|ARMF_N(RID_R12)|CALLBACK_MCODE_HEAD; +@@ -295,7 +295,7 @@ static void callback_mcode_new(CTState *cts) + DWORD oprot; + LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); + } +-#elif LJ_TARGET_POSIX ++#elif LJ_TARGET_POSIX && !LJ_TARGET_NUTTX + mprotect(p, sz, (PROT_READ|PROT_EXEC)); + #endif + } +diff --git a/src/lj_clib.c b/src/lj_clib.c +index f0ef6edd..0fc6419b 100644 +--- a/src/lj_clib.c ++++ b/src/lj_clib.c +@@ -50,6 +50,8 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) + #define CLIB_SOEXT "%s.dylib" + #elif LJ_TARGET_CYGWIN + #define CLIB_SOEXT "%s.dll" ++#elif LJ_TARGET_NUTTX ++#define CLIB_SOEXT "%s" + #else + #define CLIB_SOEXT "%s.so" + #endif +@@ -428,7 +430,11 @@ void lj_clib_unload(CLibrary *cl) + void lj_clib_default(lua_State *L, GCtab *mt) + { + CLibrary *cl = clib_new(L, mt); ++#if LJ_TARGET_NUTTX ++ cl->handle = clib_loadlib(L, "c", 0); ++#else + cl->handle = CLIB_DEFHANDLE; ++#endif + } + + #endif +diff --git a/src/lj_def.h b/src/lj_def.h +index b61297aa..03f60c3f 100644 +--- a/src/lj_def.h ++++ b/src/lj_def.h +@@ -87,6 +87,7 @@ typedef unsigned int uintptr_t; + #define LJ_MAX_EXITSTUBGR 16 /* Max. # of exit stub groups. */ + + /* Various macros. */ ++#undef UNUSED /* NuttX UNUSED macro is giving us problems. Use our own. */ + #ifndef UNUSED + #define UNUSED(x) ((void)(x)) /* to avoid warnings */ + #endif +diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h +index 52762eea..909d4d5a 100644 +--- a/src/lj_dispatch.h ++++ b/src/lj_dispatch.h +@@ -87,25 +87,23 @@ typedef uint16_t HotCount; + + /* Global state, main thread and extra fields are allocated together. */ + typedef struct GG_State { +- lua_State L; /* Main thread. */ +- global_State g; /* Global state. */ +-#if LJ_TARGET_ARM && !LJ_TARGET_NX +- /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ +- uint8_t align1[(16-sizeof(global_State))&15]; +-#endif ++ lua_State L; /* Main thread. */ + #if LJ_TARGET_MIPS +- ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ ++ ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ + #endif + #if LJ_HASJIT +- jit_State J; /* JIT state. */ +- HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ ++ jit_State J; /* JIT state. */ ++ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ + #if LJ_TARGET_ARM && !LJ_TARGET_NX + /* Ditto for J. */ +- uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; ++ uint8_t align1[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; + #endif + #endif +- ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ +- BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ ++ global_State g; /* Global state. */ /* jturnsek: moved here in order to avoid excessive negative offsets when LJ_HASJIT */ ++ /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ ++ uint8_t align2[(16-sizeof(global_State))&15]; ++ ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ ++ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ + } GG_State; + + #define GG_OFS(field) ((int)offsetof(GG_State, field)) +diff --git a/src/lj_emit_armv7m.h b/src/lj_emit_armv7m.h +new file mode 100644 +index 00000000..5381df8b +--- /dev/null ++++ b/src/lj_emit_armv7m.h +@@ -0,0 +1,474 @@ ++/* ++** ARMv7-M instruction emitter. ++** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++/* -- Constant encoding --------------------------------------------------- */ ++ ++#define INVAI_MASK 0xfbe0 ++ ++static uint32_t emit_invai[16] = { ++ /* AND, TST */ ((ARMI_AND ^ 0x1a00) ^ (ARMI_BIC ^ 0x1a00)) & INVAI_MASK, ++ /* BIC */ ((ARMI_BIC ^ 0x1a00) ^ (ARMI_AND ^ 0x1a00)) & INVAI_MASK, ++ /* MOV, ORR */ ((ARMI_MOV ^ 0x1a00) ^ (ARMI_MVN ^ 0x1a00)) & INVAI_MASK, ++ /* MVN, ORN */ ((ARMI_MVN ^ 0x1a00) ^ (ARMI_MOV ^ 0x1a00)) & INVAI_MASK, ++ /* EOR, TEQ */ 0, ++ 0, ++ 0, ++ 0, ++ /* ADD, CMN */ ((ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00)) & INVAI_MASK, ++ 0, ++ /* ADC */ ((ARMI_ADC ^ 0x1a00) ^ (ARMI_SBC ^ 0x1a00)) & INVAI_MASK, ++ /* SBC */ ((ARMI_SBC ^ 0x1a00) ^ (ARMI_ADC ^ 0x1a00)) & INVAI_MASK, ++ 0, ++ /* SUB, CMP */ ((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK, ++ /* RSB */ 0, ++ 0 ++}; ++ ++/* Encode constant in K12 format for data processing instructions. */ ++static unsigned int emit_isk12(ARMIns ai, signed int n) ++{ ++ unsigned int invai, i, m = (unsigned int)n; ++ /* K12: 1bcdefgh value, rotated in steps of one bit. */ ++ if (m <= 255) { ++ /* i:imm3 = 0000 */ ++ return ARMC_K12(0, m); ++ } ++ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) { ++ /* i:imm3 = 0001 */ ++ return ARMC_K12(0, 0x100 | (m & 0xff)); ++ } ++ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) { ++ /* i:imm3 = 0010 */ ++ return ARMC_K12(0, 0x200 | (m >> 8 & 0xff)); ++ } ++ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) { ++ /* i:imm3 = 0011 */ ++ return ARMC_K12(0, 0x300 | (m & 0xff)); ++ } ++ else { ++ for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) { ++ if (m <= 255) { ++ if ((m & 0x80) && (i >= 128 * 8)) ++ return ARMC_K12(0, i | (m & 0x7f)); ++ else ++ continue; ++ } ++ } ++ } ++ ++ /* Otherwise try negation/complement with the inverse instruction. */ ++ invai = emit_invai[(ai >> 5) & 0xf]; ++ if (!invai) return 0; /* Failed. No inverse instruction. */ ++ m = ~(unsigned int)n; ++ if (invai == (((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK) || ++ invai == (((ARMI_CMP ^ 0x1a00) ^ (ARMI_CMN ^ 0x1a00)) & INVAI_MASK)) m++; ++ if (m <= 255) { ++ /* i:imm3 = 0000 */ ++ return ARMC_K12(invai, m); ++ } ++ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) { ++ /* i:imm3 = 0001 */ ++ return ARMC_K12(invai, 0x100 | (m & 0xff)); ++ } ++ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) { ++ /* i:imm3 = 0010 */ ++ return ARMC_K12(invai, 0x200 | (m >> 8 & 0xff)); ++ } ++ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) { ++ /* i:imm3 = 0011 */ ++ return ARMC_K12(invai, 0x300 | (m & 0xff)); ++ } ++ else { ++ for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) { ++ if (m <= 255) { ++ if ((m & 0x80) && (i >= 128 * 8)) ++ return ARMC_K12(invai, i | (m & 0x7f)); ++ else ++ continue; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++/* -- Emit basic instructions --------------------------------------------- */ ++ ++static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) ++{ ++ * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm); ++} ++ ++static void emit_tnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) ++{ ++ * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn) | ARMF_M(rm); ++} ++ ++static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm) ++{ ++ * --as->mcp = ai | ARMF_D(rd) | ARMF_M(rm); ++} ++ ++static void emit_tm(ASMState *as, ARMIns ai, Reg rd, Reg rm) ++{ ++ * --as->mcp = ai | ARMF_T(rd) | ARMF_M(rm); ++} ++ ++static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn) ++{ ++ * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn); ++} ++ ++static void emit_tn(ASMState *as, ARMIns ai, Reg rd, Reg rn) ++{ ++ * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn); ++} ++ ++static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm) ++{ ++ * --as->mcp = ai | ARMF_N(rn) | ARMF_M(rm); ++} ++ ++static void emit_d(ASMState *as, ARMIns ai, Reg rd) ++{ ++ * --as->mcp = ai | ARMF_D(rd); ++} ++ ++static void emit_t(ASMState *as, ARMIns ai, Reg rd) ++{ ++ * --as->mcp = ai | ARMF_T(rd); ++} ++ ++static void emit_n(ASMState *as, ARMIns ai, Reg rn) ++{ ++ * --as->mcp = ai | ARMF_N(rn); ++} ++ ++static void emit_m(ASMState *as, ARMIns ai, Reg rm) ++{ ++ * --as->mcp = ai | ARMF_M(rm); ++} ++ ++static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) ++{ ++ lj_assertA(ofs >= -1020 && ofs <= 1020, ++ "load/store offset %d out of range", ofs); ++ if (ofs < 0) ofs = -ofs; ++ else ai |= ARMI_LSX_U; ++ * --as->mcp = ai | ARMI_LSX_P | ARMF_T(rd) | ARMF_D(rd + 1) | ARMF_N(rn) | ++ (((ofs >> 2) & 0xff) << 16); /* imm multiples of 4 */ ++} ++ ++static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) ++{ ++ lj_assertA(ofs >= -255 && ofs <= 4095, ++ "load/store offset %d out of range", ofs); ++ /* Combine LDR/STR pairs to LDRD/STRD. */ ++ if (*as->mcp == (ai | ARMI_LS_1 | ARMI_LS_P | ARMI_LS_U | ARMF_T(rd ^ 1) | ARMF_N(rn) | (ofs ^ 4)) && ++ (ai & ~(ARMI_LDR ^ ARMI_STR)) == ARMI_STR && rd != rn && ++ (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >> 2)) & 1) && ++ as->mcp != as->mcloop) { ++ as->mcp++; ++ emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd & ~1, rn, ofs & ~4); ++ return; ++ } ++ if (ofs > 255) { ++ * --as->mcp = ai | ARMI_LS_I | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xfff) << 16); ++ return; ++ } ++ if (ofs < 0) ofs = -ofs; ++ else ai |= ARMI_LS_U; ++ * --as->mcp = ai | ARMI_LS_1 | ARMI_LS_P | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xff) << 16); ++} ++ ++#if !LJ_SOFTFP ++static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) ++{ ++ lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs & 3) == 0, ++ "load/store offset %d out of range", ofs); ++ if (ofs < 0) ofs = -ofs; ++ else ai |= ARMI_LSX_U; ++ * --as->mcp = ai | ARMF_T(rd & 15) | ARMF_N(rn) | ((ofs >> 2) << 16); ++} ++#endif ++ ++/* -- Emit loads/stores --------------------------------------------------- */ ++ ++/* Prefer spills of BASE/L. */ ++#define emit_canremat(ref) ((ref) < ASMREF_L) ++ ++/* Try to find a one step delta relative to another constant. */ ++static int emit_kdelta1(ASMState *as, Reg d, int32_t i) ++{ ++ RegSet work = ~as->freeset & RSET_GPR; ++ while (work) { ++ Reg r = rset_picktop(work); ++ IRRef ref = regcost_ref(as->cost[r]); ++ lj_assertA(r != d, "dest reg not free"); ++ if (emit_canremat(ref)) { ++ int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); ++ uint32_t k = emit_isk12(ARMI_ADD, delta); ++ if (k) { ++ if (k == ARMI_K12) ++ emit_dm(as, ARMI_MOV, d, r); ++ else ++ emit_dn(as, ARMI_ADD ^ k, d, r); ++ return 1; ++ } ++ } ++ rset_clear(work, r); ++ } ++ return 0; /* Failed. */ ++} ++ ++/* Try to find a two step delta relative to another constant. */ ++static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) ++{ ++ RegSet work = ~as->freeset & RSET_GPR; ++ while (work) { ++ Reg r = rset_picktop(work); ++ IRRef ref = regcost_ref(as->cost[r]); ++ lj_assertA(r != rd, "dest reg %d not free", rd); ++ if (emit_canremat(ref)) { ++ int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; ++ if (other) { ++ int32_t delta = i - other; ++ uint32_t sh, inv = 0, k2, k; ++ if (delta < 0) { delta = -delta; inv = (ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00); } ++ sh = lj_ffs(delta) & ~1; ++ k2 = emit_isk12(0, delta & (255 << sh)); ++ k = emit_isk12(0, delta & ~(255 << sh)); ++ if (k) { ++ emit_dn(as, ARMI_ADD ^ k2 ^ inv, rd, rd); ++ emit_dn(as, ARMI_ADD ^ k ^ inv, rd, r); ++ return 1; ++ } ++ } ++ } ++ rset_clear(work, r); ++ } ++ return 0; /* Failed. */ ++} ++ ++/* Load a 32 bit constant into a GPR. */ ++static void emit_loadi(ASMState *as, Reg rd, int32_t i) ++{ ++ uint32_t k = emit_isk12(ARMI_MOV, i); ++ lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, ++ "dest reg %d not free", rd); ++ if (k) { ++ /* Standard K12 constant. */ ++ emit_d(as, ARMI_MOV ^ k, rd); ++ } ++ else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { ++ /* 16 bit loword constant for ARMv6T2. */ ++ emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd); ++ } ++ else if (emit_kdelta1(as, rd, i)) { ++ /* One step delta relative to another constant. */ ++ } ++ else if ((as->flags & JIT_F_ARMV6T2)) { ++ /* 32 bit hiword/loword constant for ARMv6T2. */ ++ emit_d(as, ARMI_MOVT | (((i >> 16) & 0xff) << 16) | (((i >> 16) & 0x700) << 20) | (((i >> 16) & 0x800) >> 1) | (((i >> 16) & 0xf000) >> 12), rd); ++ emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd); ++ } ++ else if (emit_kdelta2(as, rd, i)) { ++ /* Two step delta relative to another constant. */ ++ } ++ else { ++ /* Otherwise construct the constant with up to 4 instructions. */ ++ /* NYI: use mvn+bic, use pc-relative loads. */ ++ for (;;) { ++ uint32_t sh = lj_ffs(i) & ~1; ++ int32_t m = i & (255 << sh); ++ i &= ~(255 << sh); ++ if (i == 0) { ++ emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); ++ break; ++ } ++ emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); ++ } ++ } ++} ++ ++#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) ++ ++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); ++ ++/* Get/set from constant pointer. */ ++static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) ++{ ++ int32_t i = i32ptr(p); ++ emit_lso(as, ++ ai, ++ r, ++ ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)), ++ (i & 4095)); ++} ++ ++#if !LJ_SOFTFP ++/* Load a number constant into an FPR. */ ++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ++{ ++ cTValue *tv = ir_knum(ir); ++ int32_t i; ++ if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { ++ uint32_t hi = tv->u32.hi; ++ uint32_t b = ((hi >> 22) & 0x1ff); ++ if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) { ++ * --as->mcp = ARMI_VMOVI_D | ARMF_T(r & 15) | ++ ((((tv->u32.hi >> 12) & 0x00080000) | ++ ((tv->u32.hi >> 4) & 0x00070000)) >> 16) | ++ (((tv->u32.hi >> 16) & 0x0000000f) << 16); ++ return; ++ } ++ } ++ i = i32ptr(tv); ++ emit_vlso(as, ++ ARMI_VLDR_D, ++ r, ++ ra_allock(as, (i & ~1020), RSET_GPR), ++ (i & 1020)); ++} ++#endif ++ ++/* Get/set global_State fields. */ ++#define emit_getgl(as, r, field) \ ++ emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) ++#define emit_setgl(as, r, field) \ ++ emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field) ++ ++/* Trace number is determined from pc of exit instruction. */ ++#define emit_setvmstate(as, i) UNUSED(i) ++ ++/* -- Emit control-flow instructions -------------------------------------- */ ++ ++/* Label for internal jumps. */ ++typedef MCode *MCLabel; ++ ++/* Return label pointing to current PC. */ ++#define emit_label(as) ((as)->mcp) ++ ++static void emit_branch(ASMState *as, ARMIns ai, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = (target - p) << 1; ++ lj_assertA(((delta + 0x0080000) >> 20) == 0, "branch target out of range"); ++ * --p = ai | ARMC_B((uint32_t)delta & 0x00fffffu); ++ as->mcp = p; ++} ++ ++static void emit_branchlink(ASMState *as, ARMIns ai, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = (target - p) << 1; ++ * --p = ai | ARMC_BL((uint32_t)delta & 0x0ffffffu); ++ as->mcp = p; ++} ++ ++static void emit_jmp(ASMState *as, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = (target - p) << 1; ++ lj_assertA(((delta + 0x0800000) >> 24) == 0, "jump target out of range"); ++ * --p = ARMI_B_T4 | ARMC_BL((uint32_t)delta & 0x00ffffffu); ++ as->mcp = p; ++} ++ ++static void emit_call(ASMState *as, void *target) ++{ ++ MCode *p = --as->mcp; ++ ptrdiff_t delta = ((char *)target - (char *)p) - 4; ++ if ((((delta >> 1) + 0x00100000) >> 21) == 0) { ++ /* Only Thumb code is allowed */ ++ *p = ARMI_BL | ARMC_BL((uint32_t)(delta >> 1)); ++ } ++ else { ++ /* Target out of range: need indirect call. But don't use R0-R3. */ ++ Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12 + 1)); ++ *p = ARMI_BLXr | ARMF_M2(r); ++ } ++} ++ ++/* -- Emit generic operations --------------------------------------------- */ ++ ++/* Generic move between two regs. */ ++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) ++{ ++#if LJ_SOFTFP ++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); ++#else ++ if (dst >= RID_MAX_GPR) { ++ emit_tm(as, ++ irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, ++ (dst & 15), ++ (src & 15)); ++ return; ++ } ++#endif ++ if (as->mcp != as->mcloop) { ++ /* Swap early registers for loads/stores. */ ++ MCode ins = *as->mcp, swp = (src ^ dst); ++ if ((ins & 0x0fc0ff80) == 0x0000f800) { ++ if (!((ins ^ dst) & 0x0000000f)) ++ *as->mcp = ins ^ swp; /* Swap N in load/store. */ ++ if (!(ins & 0x00000010) && !((ins ^ (dst << 28)) & 0xf0000000)) ++ *as->mcp = ins ^ (swp << 28); /* Swap D in store. */ ++ } ++ } ++ emit_dm(as, ARMI_MOV, dst, src); ++} ++ ++/* Generic load of register with base and (small) offset address. */ ++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++#if LJ_SOFTFP ++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); ++#else ++ if (r >= RID_MAX_GPR) ++ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); ++ else ++#endif ++ emit_lso(as, ARMI_LDR, r, base, ofs); ++} ++ ++/* Generic store of register with base and (small) offset address. */ ++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++#if LJ_SOFTFP ++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); ++#else ++ if (r >= RID_MAX_GPR) ++ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); ++ else ++#endif ++ emit_lso(as, ARMI_STR, r, base, ofs); ++} ++ ++/* Emit an arithmetic/logic operation with a constant operand. */ ++static void emit_opk(ASMState *as, ++ ARMIns ai, ++ Reg dest, ++ Reg src, ++ int32_t i, ++ RegSet allow) ++{ ++ uint32_t k = emit_isk12(ai, i); ++ if (k) ++ emit_dn(as, ai ^ k, dest, src); ++ else ++ emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); ++} ++ ++/* Add offset to pointer. */ ++static void emit_addptr(ASMState *as, Reg r, int32_t ofs) ++{ ++ if (ofs) ++ emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r)); ++} ++ ++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) ++ +diff --git a/src/lj_jit.h b/src/lj_jit.h +index 7f081730..a95f9972 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -109,7 +109,7 @@ + #define JIT_P_sizemcode_DEFAULT 64 + #else + /* Could go as low as 4K, but the mmap() overhead would be rather high. */ +-#define JIT_P_sizemcode_DEFAULT 32 ++#define JIT_P_sizemcode_DEFAULT 8 + #endif + + /* Optimization parameters and their defaults. Length is a char in octal! */ +diff --git a/src/lj_mcode.c b/src/lj_mcode.c +index 163aada4..a9d44cd4 100644 +--- a/src/lj_mcode.c ++++ b/src/lj_mcode.c +@@ -45,6 +45,8 @@ void lj_mcode_sync(void *start, void *end) + sys_icache_invalidate(start, (char *)end-(char *)start); + #elif LJ_TARGET_PPC + lj_vm_cachesync(start, end); ++#elif LJ_TARGET_NUTTX ++ up_invalidate_icache_all(); + #elif defined(__GNUC__) || defined(__clang__) + __clear_cache(start, end); + #else +@@ -86,6 +88,48 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) + return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); + } + ++#elif LJ_TARGET_NUTTX ++ ++#include ++#include ++ ++static bool initialized = false; ++static struct mm_heap_s *g_mcode_heap; ++ ++#define MCPROT_RW 0 ++#define MCPROT_RX 0 ++#define MCPROT_RWX 0 ++ ++static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) ++{ ++ UNUSED(J); UNUSED(prot); ++ ++ if (!initialized) { ++ g_mcode_heap = mm_initialize("mcode", ++ (void *)CONFIG_LUAJIT_MCODE_START, ++ CONFIG_LUAJIT_MCODE_SIZE); ++ initialized = true; ++ } ++ ++ void *p = mm_malloc(g_mcode_heap, sz); ++ if (p == NULL) { ++ if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); ++ } ++ return p; ++} ++ ++static void mcode_free(jit_State *J, void *p, size_t sz) ++{ ++ UNUSED(J); UNUSED(sz); ++ mm_free(g_mcode_heap, p); ++} ++ ++static int mcode_setprot(void *p, size_t sz, int prot) ++{ ++ UNUSED(p); UNUSED(sz); UNUSED(prot); ++ return 0; ++} ++ + #elif LJ_TARGET_POSIX + + #include +diff --git a/src/lj_target.h b/src/lj_target.h +index 19716928..8cee29ea 100644 +--- a/src/lj_target.h ++++ b/src/lj_target.h +@@ -137,7 +137,11 @@ typedef uint32_t RegCost; + #if LJ_TARGET_X86ORX64 + #include "lj_target_x86.h" + #elif LJ_TARGET_ARM ++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) ++#include "lj_target_armv7m.h" ++#else + #include "lj_target_arm.h" ++#endif + #elif LJ_TARGET_ARM64 + #include "lj_target_arm64.h" + #elif LJ_TARGET_PPC +diff --git a/src/lj_target_armv7m.h b/src/lj_target_armv7m.h +new file mode 100755 +index 00000000..5dc6d488 +--- /dev/null ++++ b/src/lj_target_armv7m.h +@@ -0,0 +1,315 @@ ++/* ++** Definitions for ARMv7-M CPUs. ++** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#ifndef _LJ_TARGET_ARMV7M_H ++#define _LJ_TARGET_ARMV7M_H ++ ++/* -- Registers IDs ------------------------------------------------------- */ ++ ++#define GPRDEF(_) \ ++ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ ++ _(R8) _(R9) _(R10) _(R11) _(R12) _(SP) _(LR) _(PC) ++#if LJ_SOFTFP ++#define FPRDEF(_) ++#else ++#define FPRDEF(_) \ ++ _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ ++ _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) ++#endif ++#define VRIDDEF(_) ++ ++#define RIDENUM(name) RID_##name, ++ ++enum { ++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ ++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ ++ RID_MAX, ++ RID_TMP = RID_LR, ++ ++ /* Calling conventions. */ ++ RID_RET = RID_R0, ++ RID_RETLO = RID_R0, ++ RID_RETHI = RID_R1, ++#if LJ_SOFTFP ++ RID_FPRET = RID_R0, ++#else ++ RID_FPRET = RID_D0, ++#endif ++ ++ /* These definitions must match with the *.dasc file(s): */ ++ RID_BASE = RID_R9, /* Interpreter BASE. */ ++ RID_LPC = RID_R6, /* Interpreter PC. */ ++ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ ++ RID_LREG = RID_R8, /* Interpreter L. */ ++ ++ /* Register ranges [min, max) and number of registers. */ ++ RID_MIN_GPR = RID_R0, ++ RID_MAX_GPR = RID_PC + 1, ++ RID_MIN_FPR = RID_MAX_GPR, ++#if LJ_SOFTFP ++ RID_MAX_FPR = RID_MIN_FPR, ++#else ++ RID_MAX_FPR = RID_D15 + 1, ++#endif ++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, ++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR ++}; ++ ++#define RID_NUM_KREF RID_NUM_GPR ++#define RID_MIN_KREF RID_R0 ++ ++/* -- Register sets ------------------------------------------------------- */ ++ ++/* Make use of all registers, except sp, lr and pc. */ ++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) ++#define RSET_GPREVEN \ ++ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ ++ RID2RSET(RID_R8)|RID2RSET(RID_R10)) ++#define RSET_GPRODD \ ++ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ ++ RID2RSET(RID_R9)|RID2RSET(RID_R11)) ++#if LJ_SOFTFP ++#define RSET_FPR 0 ++#else ++#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) ++#endif ++#define RSET_ALL (RSET_GPR|RSET_FPR) ++#define RSET_INIT RSET_ALL ++ ++/* ABI-specific register sets. lr is an implicit scratch register. */ ++#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) ++#ifdef __APPLE__ ++#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) ++#else ++#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ ++#endif ++#if LJ_SOFTFP ++#define RSET_SCRATCH_FPR 0 ++#else ++#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) ++#endif ++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) ++#define REGARG_FIRSTGPR RID_R0 ++#define REGARG_LASTGPR RID_R3 ++#define REGARG_NUMGPR 4 ++#if LJ_ABI_SOFTFP ++#define REGARG_FIRSTFPR 0 ++#define REGARG_LASTFPR 0 ++#define REGARG_NUMFPR 0 ++#else ++#define REGARG_FIRSTFPR RID_D0 ++#define REGARG_LASTFPR RID_D7 ++#define REGARG_NUMFPR 8 ++#endif ++ ++/* -- Spill slots --------------------------------------------------------- */ ++ ++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. ++** ++** SPS_FIXED: Available fixed spill slots in interpreter frame. ++** This definition must match with the *.dasc file(s). ++** ++** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. ++*/ ++#define SPS_FIXED 2 ++#define SPS_FIRST 2 ++ ++#define SPOFS_TMP 0 ++ ++#define sps_scale(slot) (4 * (int32_t)(slot)) ++#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) ++ ++/* -- Exit state ---------------------------------------------------------- */ ++ ++/* This definition must match with the *.dasc file(s). */ ++typedef struct { ++#if !LJ_SOFTFP ++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ ++#endif ++ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ ++ int32_t spill[256]; /* Spill slots. */ ++} ExitState; ++ ++/* PC after instruction that caused an exit. Used to find the trace number. */ ++#define EXITSTATE_PCREG RID_PC ++/* Highest exit + 1 indicates stack check. */ ++#define EXITSTATE_CHECKEXIT 1 ++ ++#define EXITSTUB_SPACING 4 ++#define EXITSTUBS_PER_GROUP 32 ++ ++/* -- Instructions -------------------------------------------------------- */ ++ ++/* Instruction fields. */ ++#define ARMF_CC(ai, cc) ((ai) | ((cc) << 6)) ++#define ARMF_N(r) ((r)<<0) ++#define ARMF_T(r) ((r)<<28) ++#define ARMF_D(r) ((r)<<24) ++#define ARMF_M(r) ((r)<<16) ++#define ARMF_M2(r) ((r)<<19) // BLXr ++#define ARMF_SH(sh, n) (((sh)<<20)|(((n)&0x3)<<22)|((((n)>>2)&0x7)<<28)) ++#define ARMF_LSL(n) ((n&0x3)<<20) ++#define ARMF_RSH(sh, r) (0xf0000000|((sh)<<5)|ARMF_M(r)) ++ ++/* Instruction compositing */ ++#define ARMC_K12(arg1, arg2) (((arg1)^ARMI_K12)| \ ++ (((arg2)&0xff)<<16)| \ ++ (((arg2)&0x700)<<20)| \ ++ (((arg2)&0x800)>>1)) ++#define ARMC_B(arg) ((((arg)&0x7ff)<<16)| \ ++ (((arg)&0x1f800)>>11)| \ ++ (((arg)&0x20000)<<12)| \ ++ (((arg)&0x40000)<<9)| \ ++ (((arg)&0x80000)>>9)) ++#define ARMC_BL(arg) ((((arg)&0x7ff)<<16)| \ ++ (((arg)&0x1ff800)>>11)| \ ++ (((~(((arg)&0x200000)>>21)&0x1)^((((arg)&0x800000)>>23)&0x1))<<27)| \ ++ (((~(((arg)&0x400000)>>22)&0x1)^((((arg)&0x800000)>>23)&0x1))<<29)| \ ++ (((((arg)&0x800000)>>23)&0x1)<<10)) ++#define ARMC_BL_READ(ins) (((((ins)&0x07ff0000u)>>16))| \ ++ (((ins)&0x000003ffu)<<11)| \ ++ (((~((((ins)&0x08000000u)>>27)^(((ins)&0x00000400u)>>10)))&0x1)<<21)| \ ++ (((~((((ins)&0x20000000u)>>29)^(((ins)&0x00000400u)>>10)))&0x1)<<22)| \ ++ ((((ins)&0x00000400u)>>10)<<23)) ++#define ARMI_IT(cc) *--as->mcp = (0xbf08bf00u|(((cc)&0xf)<<20)) ++ ++ ++typedef enum ARMIns { ++ ARMI_CCAL = 0x000003c0, ++ ARMI_S = 0x00000010, ++ ARMI_K12 = 0x00001a00, ++ ++ ARMI_LS_W = 0x01000000, ++ ARMI_LS_U = 0x02000000, ++ ARMI_LS_P = 0x04000000, ++ ARMI_LS_1 = 0x08000000, ++ ARMI_LS_I = 0x00000080, ++ ARMI_LSX_W = 0x00000020, ++ ARMI_LSX_U = 0x00000080, ++ ARMI_LSX_P = 0x00000100, ++ ++ ARMI_AND = 0x0000ea00, ++ ARMI_EOR = 0x0000ea80, ++ ARMI_SUB = 0x0000eba0, ++ ARMI_RSB = 0x0000ebc0, ++ ARMI_ADD = 0x0000eb00, ++ ARMI_ADC = 0x0000eb40, ++ ARMI_SBC = 0x0000eb60, ++ // ARMI_RSC = 0xe0e00000, ++ ARMI_TST = 0x0f00ea10, ++ ARMI_TEQ = 0x0f00ea90, ++ ARMI_CMP = 0x0f00ebb0, ++ ARMI_CMN = 0x0f00eb10, ++ ARMI_ORR = 0x0000ea40, ++ ARMI_MOV = 0x0000ea4f, ++ ARMI_BIC = 0x0000ea20, ++ ARMI_MVN = 0x0000ea6f, ++ ARMI_NOP = 0xbf00bf00, ++ ++ ARMI_MUL = 0xf000fb00, ++ ARMI_SMULL = 0x0000fb80, ++ ++ ARMI_LDR = 0x0000f850, ++ ARMI_LDRB = 0x0000f810, ++ ARMI_LDRH = 0x0000f830, ++ ARMI_LDRSB = 0x0000f910, ++ ARMI_LDRSH = 0x0000f930, ++ ARMI_LDRD = 0x0000e850, ++ ARMI_STR = 0x0000f840, ++ ARMI_STRB = 0x0000f800, ++ ARMI_STRH = 0x0000f820, ++ ARMI_STRD = 0x0000e840, ++ ARMI_PUSH = 0x0000e92d, ++ ++ ARMI_B = 0x8000f000, ++ ARMI_B_T4 = 0x9000f000, ++ ARMI_BL = 0xd000f000, ++ ARMI_BLXr = 0x4780bf00, ++ ++ /* ARMv6 */ ++ ARMI_REV = 0xf080fa90, ++ ARMI_SXTB = 0xf080fa4f, ++ ARMI_SXTH = 0xf080fa0f, ++ ARMI_UXTB = 0xf080fa5f, ++ ARMI_UXTH = 0xf080fa1f, ++ ++ /* ARMv6T2 */ ++ ARMI_MOVW = 0x0000f240, ++ ARMI_MOVT = 0x0000f2c0, ++ ++ /* VFP */ ++ ARMI_VMOV_D = 0x0b40eeb0, ++ ARMI_VMOV_S = 0x0a40eeb0, ++ ARMI_VMOVI_D = 0x0b00eeb0, ++ ++ ARMI_VMOV_R_S = 0x0a10ee10, ++ ARMI_VMOV_S_R = 0x0a10ee00, ++ ARMI_VMOV_RR_D = 0x0b10ec50, ++ ARMI_VMOV_D_RR = 0x0b10ec40, ++ ++ ARMI_VADD_D = 0x0b00ee30, ++ ARMI_VSUB_D = 0x0b40ee30, ++ ARMI_VMUL_D = 0x0b00ee20, ++ ARMI_VMLA_D = 0x0b00ee00, ++ ARMI_VMLS_D = 0x0b40ee00, ++ ARMI_VNMLS_D = 0x0b00ee10, ++ ARMI_VDIV_D = 0x0b00ee80, ++ ++ ARMI_VABS_D = 0x0bc0eeb0, ++ ARMI_VNEG_D = 0x0b40eeb1, ++ ARMI_VSQRT_D = 0x0bc0eeb1, ++ ++ ARMI_VCMP_D = 0x0b40eeb4, ++ ARMI_VCMPZ_D = 0x0b40eeb5, ++ ++ ARMI_VMRS = 0xfa10eef1, ++ ++ ARMI_VCVT_S32_F32 = 0x0ac0eebd, ++ ARMI_VCVT_S32_F64 = 0x0bc0eebd, ++ ARMI_VCVT_U32_F32 = 0x0ac0eebc, ++ ARMI_VCVT_U32_F64 = 0x0bc0eebc, ++ ARMI_VCVT_F32_S32 = 0x0ac0eeb8, ++ ARMI_VCVT_F64_S32 = 0x0bc0eeb8, ++ ARMI_VCVT_F32_U32 = 0x0a40eeb8, ++ ARMI_VCVT_F64_U32 = 0x0b40eeb8, ++ ARMI_VCVT_F32_F64 = 0x0bc0eeb7, ++ ARMI_VCVT_F64_F32 = 0x0ac0eeb7, ++ ++ ARMI_VLDR_S = 0x0a00ed10, ++ ARMI_VLDR_D = 0x0b00ed10, ++ ARMI_VSTR_S = 0x0a00ed00, ++ ARMI_VSTR_D = 0x0b00ed00, ++} ARMIns; ++ ++typedef enum ARMShift { ++ ARMSH_LSL, ++ ARMSH_LSR, ++ ARMSH_ASR, ++ ARMSH_ROR ++} ARMShift; ++ ++/* ARM condition codes. */ ++typedef enum ARMCC { ++ CC_EQ, ++ CC_NE, ++ CC_CS, ++ CC_CC, ++ CC_MI, ++ CC_PL, ++ CC_VS, ++ CC_VC, ++ CC_HI, ++ CC_LS, ++ CC_GE, ++ CC_LT, ++ CC_GT, ++ CC_LE, ++ CC_AL, ++ CC_HS = CC_CS, ++ CC_LO = CC_CC ++} ARMCC; ++ ++#endif +diff --git a/src/vm_armv7m.dasc b/src/vm_armv7m.dasc +new file mode 100755 +index 00000000..13266007 +--- /dev/null ++++ b/src/vm_armv7m.dasc +@@ -0,0 +1,4901 @@ ++|// Low-level VM code for ARMV7M CPUs. ++|// Bytecode interpreter, fast functions and helper functions. ++|// Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++| ++|.arch armv7m ++|.section code_op, code_sub ++| ++|.actionlist build_actionlist ++|.globals GLOB_ ++|.globalnames globnames ++|.externnames extnames ++| ++|// Note: The ragged indentation of the instructions is intentional. ++|// The starting columns indicate data dependencies. ++| ++|//----------------------------------------------------------------------- ++| ++|.macro ldrd_i, rt, rt2, rn, rm ++| add rt, rn, rm ++| ldm rt, {rt, rt2} ++|.endmacro ++|.macro ldrd_iw, rt, rt2, rn, rm ++| add rn, rn, rm ++| ldrd rt, rt2, [rn] ++|.endmacro ++| ++|.macro ldrdlo_i, rt, rt2, rn, rm ++| itt lo ++| addlo rt, rn, rm ++| ldmlo rt, {rt, rt2} ++|.endmacro ++|.macro ldrdlo_iw, rt, rt2, rn, rm ++| itt lo ++| addlo rn, rn, rm ++| ldrdlo rt, rt2, [rn] ++|.endmacro ++| ++|.macro strd_i, rt, rt2, rn, rm ++| add rn, rn, rm ++| strd rt, rt2, [rn] ++| sub rn, rn, rm ++|.endmacro ++| ++|.macro strdne_i, rt, rt2, rn, rm ++| ittt ne ++| addne rn, rn, rm ++| strdne rt, rt2, [rn] ++| subne rn, rn, rm ++|.endmacro ++|.macro strdls_i, rt, rt2, rn, rm ++| ittt ls ++| addls rn, rn, rm ++| strdls rt, rt2, [rn] ++| subls rn, rn, rm ++|.endmacro ++|.macro strdhi_i, rt, rt2, rn, rm ++| ittt hi ++| addhi rn, rn, rm ++| strdhi rt, rt2, [rn] ++| subhi rn, rn, rm ++|.endmacro ++| ++|// Fixed register assignments for the interpreter. ++| ++|// The following must be C callee-save. ++|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding. ++|.define KBASE, r5 // Constants of current Lua function. ++|.define PC, r6 // Next PC. ++|.define DISPATCH,r7 // Opcode dispatch table. ++|.define LREG, r8 // Register holding lua_State (also in SAVE_L). ++| ++|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+. ++|.define BASE, r9 // Base of current Lua stack frame. ++| ++|// The following temporaries are not saved across C calls, except for RA/RC. ++|.define RA, r10 // Callee-save. ++|.define RC, r11 // Callee-save. ++|.define RB, r12 ++|.define OP, r12 // Overlaps RB, must not be lr. ++|.define INS, lr ++| ++|// Calling conventions. Also used as temporaries. ++|.define CARG1, r0 ++|.define CARG2, r1 ++|.define CARG3, r2 ++|.define CARG4, r3 ++| ++|.define CRET1, r0 ++|.define CRET2, r1 ++| ++|// Stack layout while in interpreter. Must match with lj_frame.h. ++|.define SAVE_R4, [sp, #28] ++|.define CFRAME_SPACE, #28 ++|.define SAVE_ERRF, [sp, #24] ++|.define SAVE_NRES, [sp, #20] ++|.define SAVE_CFRAME, [sp, #16] ++|.define SAVE_L, [sp, #12] ++|.define SAVE_PC, [sp, #8] ++|.define SAVE_MULTRES, [sp, #4] ++|.define ARG5, [sp] ++| ++|.define TMPDhi, [sp, #4] ++|.define TMPDlo, [sp] ++|.define TMPD, [sp] ++|.define TMPDp, sp ++| ++|.if FPU ++|.macro saveregs ++| push {r5, r6, r7, r8, r9, r10, r11, lr} ++| vpush {d8-d15} ++| sub sp, sp, CFRAME_SPACE+4 ++| str r4, SAVE_R4 ++|.endmacro ++|.macro restoreregs_ret ++| ldr r4, SAVE_R4 ++| add sp, sp, CFRAME_SPACE+4 ++| vpop {d8-d15} ++| pop {r5, r6, r7, r8, r9, r10, r11, pc} ++|.endmacro ++|.else ++|.macro saveregs ++| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ++| sub sp, sp, CFRAME_SPACE ++|.endmacro ++|.macro restoreregs_ret ++| add sp, sp, CFRAME_SPACE ++| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ++|.endmacro ++|.endif ++| ++|// Type definitions. Some of these are only used for documentation. ++|.type L, lua_State, LREG ++|.type GL, global_State ++|.type TVALUE, TValue ++|.type GCOBJ, GCobj ++|.type STR, GCstr ++|.type TAB, GCtab ++|.type LFUNC, GCfuncL ++|.type CFUNC, GCfuncC ++|.type PROTO, GCproto ++|.type UPVAL, GCupval ++|.type NODE, Node ++|.type NARGS8, int ++|.type TRACE, GCtrace ++|.type SBUF, SBuf ++| ++|//----------------------------------------------------------------------- ++| ++|// Trap for not-yet-implemented parts. ++|.macro NYI; bkpt #0; .endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|// Access to frame relative to BASE. ++|.define FRAME_FUNC, #-8 ++|.define FRAME_PC, #-4 ++| ++|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro ++|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro ++|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro ++|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro ++|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro ++| ++|// Instruction fetch. ++|.macro ins_NEXT1 ++| ldrb OP, [PC] ++|.endmacro ++|.macro ins_NEXT2 ++| ldr INS, [PC], #4 ++|.endmacro ++|// Instruction decode+dispatch. ++|.macro ins_NEXT3 ++| ldr OP, [DISPATCH, OP, lsl #2] ++| decode_RA8 RA, INS ++| decode_RD RC, INS ++| bx OP ++|.endmacro ++|.macro ins_NEXT ++| ins_NEXT1 ++| ins_NEXT2 ++| ins_NEXT3 ++|.endmacro ++| ++|// Instruction footer. ++|.if 1 ++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. ++| .define ins_next, ins_NEXT ++| .define ins_next_, ins_NEXT ++| .define ins_next1, ins_NEXT1 ++| .define ins_next2, ins_NEXT2 ++| .define ins_next3, ins_NEXT3 ++|.else ++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. ++| // Affects only certain kinds of benchmarks (and only with -j off). ++| .macro ins_next ++| b ->ins_next ++| .endmacro ++| .macro ins_next1 ++| .endmacro ++| .macro ins_next2 ++| .endmacro ++| .macro ins_next3 ++| b ->ins_next ++| .endmacro ++| .macro ins_next_ ++| ->ins_next: ++| ins_NEXT ++| .endmacro ++|.endif ++| ++|// Avoid register name substitution for field name. ++#define field_pc pc ++| ++|// Call decode and dispatch. ++|.macro ins_callt ++| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++| ldr PC, LFUNC:CARG3->field_pc ++| ldrb OP, [PC] // STALL: load PC. early PC. ++| ldr INS, [PC], #4 ++| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP. ++| decode_RA8 RA, INS ++| add RA, RA, BASE ++| bx OP ++|.endmacro ++| ++|.macro ins_call ++| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC ++| str PC, [BASE, FRAME_PC] ++| ins_callt // STALL: locked PC. ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|// Macros to test operand types. ++|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro ++|.macro checktpeq, reg, tp; it eq; cmneq reg, #-tp; .endmacro ++|.macro checktpne, reg, tp; it ne; cmnne reg, #-tp; .endmacro ++|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro ++|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro ++|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro ++| ++|// Assumes DISPATCH is relative to GL. ++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) ++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) ++| ++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) ++| ++|.macro hotcheck, delta ++| lsr CARG1, PC, #1 ++| and CARG1, CARG1, #126 ++| sub CARG1, CARG1, #-GG_DISP2HOT ++| ldrh CARG2, [DISPATCH, CARG1] ++| subs CARG2, CARG2, #delta ++| strh CARG2, [DISPATCH, CARG1] ++|.endmacro ++| ++|.macro hotloop ++| hotcheck HOTCOUNT_LOOP ++| blo ->vm_hotloop ++|.endmacro ++| ++|.macro hotcall ++| hotcheck HOTCOUNT_CALL ++| blo ->vm_hotcall ++|.endmacro ++| ++|// Set current VM state. ++|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro ++|.macro st_vmstate, reg; push {r12}; sub r12, DISPATCH, #-DISPATCH_GL(vmstate); str reg, [r12]; pop {r12}; .endmacro ++| ++|// Move table write barrier back. Overwrites mark and tmp. ++|.macro barrierback, tab, mark, tmp ++| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain) ++| ldr tmp, [tmp] ++| str tmp, tab->gclist ++| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain) ++| bic mark, mark, #LJ_GC_BLACK // black2gray(tab) ++| str tab, [tmp] ++| strb mark, tab->marked ++|.endmacro ++| ++|.macro .IOS, a, b ++|.if IOS ++| a, b ++|.endif ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++ ++#if !LJ_DUALNUM ++#error "Only dual-number mode supported for ARM target" ++#endif ++ ++/* Generate subroutines used by opcodes and other parts of the VM. */ ++/* The .code_sub section should be last to help static branch prediction. */ ++static void build_subroutines(BuildCtx *ctx) ++{ ++ |.code_sub ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Return handling ---------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_returnp: ++ | // See vm_return. Also: RB = previous base. ++ | tst PC, #FRAME_P ++ | beq ->cont_dispatch ++ | ++ | // Return from pcall or xpcall fast func. ++ | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. ++ | mvn CARG2, #~LJ_TTRUE ++ | mov BASE, RB ++ | // Prepending may overwrite the pcall frame, so do it at the end. ++ | str CARG2, [RA, FRAME_PC] // Prepend true to results. ++ | sub RA, RA, #8 ++ | ++ |->vm_returnc: ++ | adds RC, RC, #8 // RC = (nresults+1)*8. ++ | mov CRET1, #LUA_YIELD ++ | beq ->vm_unwind_c_eh ++ | str RC, SAVE_MULTRES ++ | ands CARG1, PC, #FRAME_TYPE ++ | beq ->BC_RET_Z // Handle regular return to Lua. ++ | ++ |->vm_return: ++ | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return ++ | // CARG1 = PC & FRAME_TYPE ++ | bic RB, PC, #FRAME_TYPEP ++ | cmp CARG1, #FRAME_C ++ | sub RB, BASE, RB // RB = previous base. ++ | bne ->vm_returnp ++ | ++ | str RB, L->base ++ | ldr KBASE, SAVE_NRES ++ | mv_vmstate CARG4, C ++ | sub BASE, BASE, #8 ++ | subs CARG3, RC, #8 ++ | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8 ++ | st_vmstate CARG4 ++ | beq >2 ++ |1: ++ | subs CARG3, CARG3, #8 ++ | ldrd CARG1, CARG2, [RA], #8 ++ | strd CARG1, CARG2, [BASE], #8 ++ | bne <1 ++ |2: ++ | cmp KBASE, RC // More/less results wanted? ++ | bne >6 ++ |3: ++ | str BASE, L->top // Store new top. ++ | ++ |->vm_leave_cp: ++ | ldr RC, SAVE_CFRAME // Restore previous C frame. ++ | mov CRET1, #0 // Ok return status for vm_pcall. ++ | str RC, L->cframe ++ | ++ |->vm_leave_unw: ++ | restoreregs_ret ++ | ++ |6: ++ | blt >7 // Less results wanted? ++ | // More results wanted. Check stack size and fill up results with nil. ++ | ldr CARG3, L->maxstack ++ | mvn CARG2, #~LJ_TNIL ++ | cmp BASE, CARG3 ++ | bhs >8 ++ | str CARG2, [BASE, #4] ++ | add RC, RC, #8 ++ | add BASE, BASE, #8 ++ | b <2 ++ | ++ |7: // Less results wanted. ++ | sub CARG1, RC, KBASE ++ | cmp KBASE, #0 // LUA_MULTRET+1 case? ++ | it ne ++ | subne BASE, BASE, CARG1 // Either keep top or shrink it. ++ | b <3 ++ | ++ |8: // Corner case: need to grow stack for filling up results. ++ | // This can happen if: ++ | // - A C function grows the stack (a lot). ++ | // - The GC shrinks the stack in between. ++ | // - A return back from a lua_call() with (high) nresults adjustment. ++ | str BASE, L->top // Save current top held in BASE (yes). ++ | lsr CARG2, KBASE, #3 ++ | mov CARG1, L ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ldr BASE, L->top // Need the (realloced) L->top in BASE. ++ | b <2 ++ | ++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall. ++ | // (void *cframe, int errcode) ++ | mov sp, CARG1 ++ | mov CRET1, CARG2 ++ |->vm_unwind_c_eh: // Landing pad for external unwinder. ++ | ldr L, SAVE_L ++ | mv_vmstate CARG4, C ++ | ldr GL:CARG3, L->glref ++ | str CARG4, GL:CARG3->vmstate ++ | b ->vm_leave_unw ++ | ++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall. ++ | // (void *cframe) ++ | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. ++ | mov sp, CARG1 ++ |->vm_unwind_ff_eh: // Landing pad for external unwinder. ++ | ldr L, SAVE_L ++ | mov MASKR8, #255 ++ | mov RC, #16 // 2 results: false + error message. ++ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. ++ | ldr BASE, L->base ++ | ldr DISPATCH, L->glref // Setup pointer to dispatch table. ++ | mvn CARG1, #~LJ_TFALSE ++ | sub RA, BASE, #8 // Results start at BASE-8. ++ | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. ++ | add DISPATCH, DISPATCH, #GG_G2DISP ++ | mv_vmstate CARG2, INTERP ++ | str CARG1, [BASE, #-4] // Prepend false to error message. ++ | st_vmstate CARG2 ++ | b ->vm_returnc ++ | ++ |->vm_unwind_ext: // Complete external unwind. ++#if !LJ_NO_UNWIND ++ | push {r0, r1, r2, lr} ++ | bl extern _Unwind_Complete ++ | ldr r0, [sp] ++ | bl extern _Unwind_DeleteException ++ | pop {r0, r1, r2, lr} ++ | mov r0, r1 ++ | bx r2 ++#endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Grow stack for calls ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_growstack_c: // Grow stack for C function. ++ | // CARG1 = L ++ | mov CARG2, #LUA_MINSTACK ++ | b >2 ++ | ++ |->vm_growstack_l: // Grow stack for Lua function. ++ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC ++ | add RC, BASE, RC ++ | sub RA, RA, BASE ++ | mov CARG1, L ++ | str BASE, L->base ++ | add PC, PC, #4 // Must point after first instruction. ++ | str RC, L->top ++ | lsr CARG2, RA, #3 ++ |2: ++ | // L->base = new base, L->top = top ++ | str PC, SAVE_PC ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ldr BASE, L->base ++ | ldr RC, L->top ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] ++ | sub NARGS8:RC, RC, BASE ++ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++ | ins_callt // Just retry the call. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Entry points into the assembler VM --------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_resume: // Setup C frame and resume thread. ++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) ++ | saveregs ++ | mov L, CARG1 ++ | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table. ++ | mov BASE, CARG2 ++ | add DISPATCH, DISPATCH, #GG_G2DISP ++ | str L, SAVE_L ++ | mov PC, #FRAME_CP ++ | str CARG3, SAVE_NRES ++ | add CARG2, sp, #CFRAME_RESUME ++ | ldrb CARG1, L->status ++ | str CARG3, SAVE_ERRF ++ | str L, SAVE_PC // Any value outside of bytecode is ok. ++ | str CARG3, SAVE_CFRAME ++ | cmp CARG1, #0 ++ | str CARG2, L->cframe ++ | beq >3 ++ | ++ | // Resume after yield (like a return). ++ | str L, [DISPATCH, #DISPATCH_GL(cur_L)] ++ | mov RA, BASE ++ | ldr BASE, L->base ++ | ldr CARG1, L->top ++ | mov MASKR8, #255 ++ | strb CARG3, L->status ++ | sub RC, CARG1, BASE ++ | ldr PC, [BASE, FRAME_PC] ++ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. ++ | mv_vmstate CARG2, INTERP ++ | add RC, RC, #8 ++ | ands CARG1, PC, #FRAME_TYPE ++ | st_vmstate CARG2 ++ | str RC, SAVE_MULTRES ++ | beq ->BC_RET_Z ++ | b ->vm_return ++ | ++ |->vm_pcall: // Setup protected C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) ++ | saveregs ++ | mov PC, #FRAME_CP ++ | str CARG4, SAVE_ERRF ++ | b >1 ++ | ++ |->vm_call: // Setup C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1) ++ | saveregs ++ | mov PC, #FRAME_C ++ | ++ |1: // Entry point for vm_pcall above (PC = ftype). ++ | ldr RC, L:CARG1->cframe ++ | str CARG3, SAVE_NRES ++ | mov L, CARG1 ++ | str CARG1, SAVE_L ++ | ldr DISPATCH, L->glref // Setup pointer to dispatch table. ++ | mov BASE, CARG2 ++ | str CARG1, SAVE_PC // Any value outside of bytecode is ok. ++ | str RC, SAVE_CFRAME ++ | add DISPATCH, DISPATCH, #GG_G2DISP ++ | str sp, L->cframe // Add our C frame to cframe chain. ++ | ++ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). ++ | str L, [DISPATCH, #DISPATCH_GL(cur_L)] ++ | ldr RB, L->base // RB = old base (for vmeta_call). ++ | ldr CARG1, L->top ++ | mov MASKR8, #255 ++ | add PC, PC, BASE ++ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. ++ | sub PC, PC, RB // PC = frame delta + frame type ++ | mv_vmstate CARG2, INTERP ++ | sub NARGS8:RC, CARG1, BASE ++ | st_vmstate CARG2 ++ | ++ |->vm_call_dispatch: ++ | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC ++ | ldrd CARG3, CARG4, [BASE, FRAME_FUNC] ++ | checkfunc CARG4, ->vmeta_call ++ | ++ |->vm_call_dispatch_f: ++ | ins_call ++ | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC ++ | ++ |->vm_cpcall: // Setup protected C frame, call C. ++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) ++ | saveregs ++ | mov L, CARG1 ++ | ldr RA, L:CARG1->stack ++ | str CARG1, SAVE_L ++ | ldr DISPATCH, L->glref // Setup pointer to dispatch table. ++ | ldr RB, L->top ++ | str CARG1, SAVE_PC // Any value outside of bytecode is ok. ++ | ldr RC, L->cframe ++ | add DISPATCH, DISPATCH, #GG_G2DISP ++ | sub RA, RA, RB // Compute -savestack(L, L->top). ++ | mov RB, #0 ++ | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. ++ | str RB, SAVE_ERRF // No error function. ++ | str RC, SAVE_CFRAME ++ | str sp, L->cframe // Add our C frame to cframe chain. ++ | str L, [DISPATCH, #DISPATCH_GL(cur_L)] ++ | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) ++ | movs BASE, CRET1 ++ | mov PC, #FRAME_CP ++ | bne <3 // Else continue with the call. ++ | b ->vm_leave_cp // No base? Just remove C frame. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Metamethod handling ------------------------------------------------ ++ |//----------------------------------------------------------------------- ++ | ++ |//-- Continuation dispatch ---------------------------------------------- ++ | ++ |->cont_dispatch: ++ | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 ++ | ldr LFUNC:CARG3, [RB, FRAME_FUNC] ++ | ldr CARG1, [BASE, #-16] // Get continuation. ++ | mov CARG4, BASE ++ | mov BASE, RB // Restore caller BASE. ++ |.if FFI ++ | cmp CARG1, #1 ++ |.endif ++ | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. ++ | mvn INS, #~LJ_TNIL ++ | add CARG2, RA, RC ++ | str INS, [CARG2, #-4] // Ensure one valid arg. ++ |.if FFI ++ | bls >1 ++ |.endif ++ | ldr CARG3, LFUNC:CARG3->field_pc ++ | ldr KBASE, [CARG3, #PC2PROTO(k)] ++ | // BASE = base, RA = resultptr, CARG4 = meta base ++ | bx CARG1 ++ | ++ |.if FFI ++ |1: ++ | beq ->cont_ffi_callback // cont = 1: return from FFI callback. ++ | // cont = 0: tailcall from C function. ++ | sub CARG4, CARG4, #16 ++ | sub RC, CARG4, BASE ++ | b ->vm_call_tail ++ |.endif ++ | ++ |->cont_cat: // RA = resultptr, CARG4 = meta base ++ | ldr INS, [PC, #-4] ++ | sub CARG2, CARG4, #16 ++ | ldrd CARG3, CARG4, [RA] ++ | str BASE, L->base ++ | decode_RB8 RC, INS ++ | decode_RA8 RA, INS ++ | add CARG1, BASE, RC ++ | subs CARG1, CARG2, CARG1 ++ | itt ne ++ | strdne CARG3, CARG4, [CARG2] ++ | movne CARG3, CARG1 ++ | bne ->BC_CAT_Z ++ | strd_i CARG3, CARG4, BASE, RA ++ | b ->cont_nop ++ | ++ |//-- Table indexing metamethods ----------------------------------------- ++ | ++ |->vmeta_tgets1: ++ | add CARG2, BASE, RB ++ | b >2 ++ | ++ |->vmeta_tgets: ++ | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) ++ | mvn CARG4, #~LJ_TTAB ++ | str TAB:RB, [CARG2] ++ | str CARG4, [CARG2, #4] ++ |2: ++ | mvn CARG4, #~LJ_TSTR ++ | str STR:RC, TMPDlo ++ | str CARG4, TMPDhi ++ | mov CARG3, TMPDp ++ | b >1 ++ | ++ |->vmeta_tgetb: // RC = index ++ | decode_RB8 RB, INS ++ | str RC, TMPDlo ++ | mvn CARG4, #~LJ_TISNUM ++ | add CARG2, BASE, RB ++ | str CARG4, TMPDhi ++ | mov CARG3, TMPDp ++ | b >1 ++ | ++ |->vmeta_tgetv: ++ | add CARG2, BASE, RB ++ | add CARG3, BASE, RC ++ |1: ++ | str BASE, L->base ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | .IOS ldr BASE, L->base ++ | cmp CRET1, #0 ++ | beq >3 ++ | ldrd CARG3, CARG4, [CRET1] ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG3, CARG4, BASE, RA ++ | ins_next3 ++ | ++ |3: // Call __index metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k ++ | rsb CARG1, BASE, #FRAME_CONT ++ | ldr BASE, L->top ++ | mov NARGS8:RC, #16 // 2 args for func(t, k). ++ | str PC, [BASE, #-12] // [cont|PC] ++ | add PC, CARG1, BASE ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. ++ | b ->vm_call_dispatch_f ++ | ++ |->vmeta_tgetr: ++ | .IOS mov RC, BASE ++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | .IOS mov BASE, RC ++ | cmp CRET1, #0 ++ | ite ne ++ | ldrdne CARG1, CARG2, [CRET1] ++ | mvneq CARG2, #~LJ_TNIL ++ | b ->BC_TGETR_Z ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->vmeta_tsets1: ++ | add CARG2, BASE, RB ++ | b >2 ++ | ++ |->vmeta_tsets: ++ | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) ++ | mvn CARG4, #~LJ_TTAB ++ | str TAB:RB, [CARG2] ++ | str CARG4, [CARG2, #4] ++ |2: ++ | mvn CARG4, #~LJ_TSTR ++ | str STR:RC, TMPDlo ++ | str CARG4, TMPDhi ++ | mov CARG3, TMPDp ++ | b >1 ++ | ++ |->vmeta_tsetb: // RC = index ++ | decode_RB8 RB, INS ++ | str RC, TMPDlo ++ | mvn CARG4, #~LJ_TISNUM ++ | add CARG2, BASE, RB ++ | str CARG4, TMPDhi ++ | mov CARG3, TMPDp ++ | b >1 ++ | ++ |->vmeta_tsetv: ++ | add CARG2, BASE, RB ++ | add CARG3, BASE, RC ++ |1: ++ | str BASE, L->base ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | .IOS ldr BASE, L->base ++ | cmp CRET1, #0 ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | beq >3 ++ | ins_next1 ++ | // NOBARRIER: lj_meta_tset ensures the table is not black. ++ | strd CARG3, CARG4, [CRET1] ++ | ins_next2 ++ | ins_next3 ++ | ++ |3: // Call __newindex metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) ++ | rsb CARG1, BASE, #FRAME_CONT ++ | ldr BASE, L->top ++ | mov NARGS8:RC, #24 // 3 args for func(t, k, v). ++ | strd CARG3, CARG4, [BASE, #16] // Copy value to third argument. ++ | str PC, [BASE, #-12] // [cont|PC] ++ | add PC, CARG1, BASE ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. ++ | b ->vm_call_dispatch_f ++ | ++ |->vmeta_tsetr: ++ | str BASE, L->base ++ | .IOS mov RC, BASE ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) ++ | // Returns TValue *. ++ | .IOS mov BASE, RC ++ | b ->BC_TSETR_Z ++ | ++ |//-- Comparison metamethods --------------------------------------------- ++ | ++ |->vmeta_comp: ++ | mov CARG1, L ++ | sub PC, PC, #4 ++ | mov CARG2, RA ++ | str BASE, L->base ++ | mov CARG3, RC ++ | str PC, SAVE_PC ++ | decode_OP CARG4, INS ++ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) ++ | // Returns 0/1 or TValue * (metamethod). ++ |3: ++ | .IOS ldr BASE, L->base ++ | cmp CRET1, #1 ++ | bhi ->vmeta_binop ++ |4: ++ | ldrh RB, [PC, #2] ++ | add PC, PC, #4 ++ | add RB, PC, RB, lsl #2 ++ | it hs ++ | subhs PC, RB, #0x20000 ++ |->cont_nop: ++ | ins_next ++ | ++ |->cont_ra: // RA = resultptr ++ | ldr INS, [PC, #-4] ++ | ldrd CARG1, CARG2, [RA] ++ | decode_RA8 CARG3, INS ++ | strd_i CARG1, CARG2, BASE, CARG3 ++ | b ->cont_nop ++ | ++ |->cont_condt: // RA = resultptr ++ | ldr CARG2, [RA, #4] ++ | mvn CARG1, #~LJ_TTRUE ++ | cmp CARG1, CARG2 // Branch if result is true. ++ | b <4 ++ | ++ |->cont_condf: // RA = resultptr ++ | ldr CARG2, [RA, #4] ++ | checktp CARG2, LJ_TFALSE // Branch if result is false. ++ | b <4 ++ | ++ |->vmeta_equal: ++ | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. ++ | sub PC, PC, #4 ++ | str BASE, L->base ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) ++ | // Returns 0/1 or TValue * (metamethod). ++ | b <3 ++ | ++ |->vmeta_equal_cd: ++ |.if FFI ++ | sub PC, PC, #4 ++ | str BASE, L->base ++ | mov CARG1, L ++ | mov CARG2, INS ++ | str PC, SAVE_PC ++ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) ++ | // Returns 0/1 or TValue * (metamethod). ++ | b <3 ++ |.endif ++ | ++ |->vmeta_istype: ++ | sub PC, PC, #4 ++ | str BASE, L->base ++ | mov CARG1, L ++ | lsr CARG2, RA, #3 ++ | mov CARG3, RC ++ | str PC, SAVE_PC ++ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) ++ | .IOS ldr BASE, L->base ++ | b ->cont_nop ++ | ++ |//-- Arithmetic metamethods --------------------------------------------- ++ | ++ |->vmeta_arith_vn: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | add CARG3, BASE, RB ++ | add CARG4, KBASE, RC ++ | b >1 ++ | ++ |->vmeta_arith_nv: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | add CARG4, BASE, RB ++ | add CARG3, KBASE, RC ++ | b >1 ++ | ++ |->vmeta_unm: ++ | ldr INS, [PC, #-8] ++ | sub PC, PC, #4 ++ | add CARG3, BASE, RC ++ | add CARG4, BASE, RC ++ | b >1 ++ | ++ |->vmeta_arith_vv: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | add CARG3, BASE, RB ++ | add CARG4, BASE, RC ++ |1: ++ | decode_OP OP, INS ++ | add CARG2, BASE, RA ++ | str BASE, L->base ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | str OP, ARG5 ++ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | .IOS ldr BASE, L->base ++ | cmp CRET1, #0 ++ | beq ->cont_nop ++ | ++ | // Call metamethod for binary op. ++ |->vmeta_binop: ++ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 ++ | sub CARG2, CRET1, BASE ++ | str PC, [CRET1, #-12] // [cont|PC] ++ | add PC, CARG2, #FRAME_CONT ++ | mov BASE, CRET1 ++ | mov NARGS8:RC, #16 // 2 args for func(o1, o2). ++ | b ->vm_call_dispatch ++ | ++ |->vmeta_len: ++ | add CARG2, BASE, RC ++ | str BASE, L->base ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | bl extern lj_meta_len // (lua_State *L, TValue *o) ++ | // Returns NULL (retry) or TValue * (metamethod base). ++ | .IOS ldr BASE, L->base ++#if LJ_52 ++ | cmp CRET1, #0 ++ | bne ->vmeta_binop // Binop call for compatibility. ++ | ldr TAB:CARG1, [BASE, RC] ++ | b ->BC_LEN_Z ++#else ++ | b ->vmeta_binop // Binop call for compatibility. ++#endif ++ | ++ |//-- Call metamethod ---------------------------------------------------- ++ | ++ |->vmeta_call: // Resolve and call __call metamethod. ++ | // RB = old base, BASE = new base, RC = nargs*8 ++ | mov CARG1, L ++ | str RB, L->base // This is the callers base! ++ | sub CARG2, BASE, #8 ++ | str PC, SAVE_PC ++ | add CARG3, BASE, NARGS8:RC ++ | .IOS mov RA, BASE ++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | .IOS mov BASE, RA ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. ++ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. ++ | ins_call ++ | ++ |->vmeta_callt: // Resolve __call for BC_CALLT. ++ | // BASE = old base, RA = new base, RC = nargs*8 ++ | mov CARG1, L ++ | str BASE, L->base ++ | sub CARG2, RA, #8 ++ | str PC, SAVE_PC ++ | add CARG3, RA, NARGS8:RC ++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | .IOS ldr BASE, L->base ++ | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here. ++ | ldr PC, [BASE, FRAME_PC] ++ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. ++ | b ->BC_CALLT2_Z ++ | ++ |//-- Argument coercion for 'for' statement ------------------------------ ++ | ++ |->vmeta_for: ++ | mov CARG1, L ++ | str BASE, L->base ++ | mov CARG2, RA ++ | str PC, SAVE_PC ++ | bl extern lj_meta_for // (lua_State *L, TValue *base) ++ | .IOS ldr BASE, L->base ++ |.if JIT ++ | ldrb OP, [PC, #-4] ++ |.endif ++ | ldr INS, [PC, #-4] ++ |.if JIT ++ | cmp OP, #BC_JFORI ++ |.endif ++ | decode_RA8 RA, INS ++ | decode_RD RC, INS ++ |.if JIT ++ | beq =>BC_JFORI ++ |.endif ++ | b =>BC_FORI ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Fast functions ----------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro .ffunc, name ++ |->ff_ .. name: ++ |.endmacro ++ | ++ |.macro .ffunc_1, name ++ |->ff_ .. name: ++ | ldrd CARG1, CARG2, [BASE] ++ | cmp NARGS8:RC, #8 ++ | blo ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_2, name ++ |->ff_ .. name: ++ | ldrd CARG1, CARG2, [BASE] ++ | ldrd CARG3, CARG4, [BASE, #8] ++ | cmp NARGS8:RC, #16 ++ | blo ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_n, name ++ | .ffunc_1 name ++ | checktp CARG2, LJ_TISNUM ++ | bhs ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_nn, name ++ | .ffunc_2 name ++ | checktp CARG2, LJ_TISNUM ++ | it lo ++ | cmnlo CARG4, #-LJ_TISNUM ++ | bhs ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_d, name ++ | .ffunc name ++ | ldr CARG2, [BASE, #4] ++ | cmp NARGS8:RC, #8 ++ | vldr d0, [BASE] ++ | blo ->fff_fallback ++ | checktp CARG2, LJ_TISNUM ++ | bhs ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_dd, name ++ | .ffunc name ++ | ldr CARG2, [BASE, #4] ++ | ldr CARG4, [BASE, #12] ++ | cmp NARGS8:RC, #16 ++ | vldr d0, [BASE] ++ | vldr d1, [BASE, #8] ++ | blo ->fff_fallback ++ | checktp CARG2, LJ_TISNUM ++ | it lo ++ | cmnlo CARG4, #-LJ_TISNUM ++ | bhs ->fff_fallback ++ |.endmacro ++ | ++ |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. ++ |.macro ffgccheck ++ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total) ++ | ldr CARG1, [CARG1] ++ | sub CARG2, DISPATCH, #-DISPATCH_GL(gc.threshold) ++ | ldr CARG2, [CARG2] ++ | cmp CARG1, CARG2 ++ | it ge ++ | blge ->fff_gcstep ++ |.endmacro ++ | ++ |//-- Base library: checks ----------------------------------------------- ++ | ++ |.ffunc_1 assert ++ | checktp CARG2, LJ_TTRUE ++ | bhi ->fff_fallback ++ | ldr PC, [BASE, FRAME_PC] ++ | strd CARG1, CARG2, [BASE, #-8] ++ | mov RB, BASE ++ | subs RA, NARGS8:RC, #8 ++ | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. ++ | beq ->fff_res // Done if exactly 1 argument. ++ |1: ++ | ldrd CARG1, CARG2, [RB, #8] ++ | subs RA, RA, #8 ++ | strd CARG1, CARG2, [RB], #8 ++ | bne <1 ++ | b ->fff_res ++ | ++ |.ffunc type ++ | ldr CARG2, [BASE, #4] ++ | cmp NARGS8:RC, #8 ++ | blo ->fff_fallback ++ | checktp CARG2, LJ_TISNUM ++ | it lo ++ | mvnlo CARG2, #~LJ_TISNUM ++ | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1 ++ | lsl CARG4, CARG4, #3 ++ | ldrd_i CARG1, CARG2, CFUNC:CARG3, CARG4 ++ | b ->fff_restv ++ | ++ |//-- Base library: getters and setters --------------------------------- ++ | ++ |.ffunc_1 getmetatable ++ | checktp CARG2, LJ_TTAB ++ | it ne ++ | cmnne CARG2, #-LJ_TUDATA ++ | bne >6 ++ |1: // Field metatable must be at same offset for GCtab and GCudata! ++ | ldr TAB:RB, TAB:CARG1->metatable ++ |2: ++ | mvn CARG2, #~LJ_TNIL ++ | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])] ++ | cmp TAB:RB, #0 ++ | beq ->fff_restv ++ | ldr CARG3, TAB:RB->hmask ++ | ldr CARG4, STR:RC->sid ++ | ldr NODE:INS, TAB:RB->node ++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask ++ | add CARG3, CARG3, CARG3, lsl #1 ++ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 ++ |3: // Rearranged logic, because we expect _not_ to find the key. ++ | ldrd CARG3, CARG4, NODE:INS->key // STALL: early NODE:INS. ++ | ldrd CARG1, CARG2, NODE:INS->val ++ | ldr NODE:INS, NODE:INS->next ++ | checktp CARG4, LJ_TSTR ++ | it eq ++ | cmpeq CARG3, STR:RC ++ | beq >5 ++ | cmp NODE:INS, #0 ++ | bne <3 ++ |4: ++ | mov CARG1, RB // Use metatable as default result. ++ | mvn CARG2, #~LJ_TTAB ++ | b ->fff_restv ++ |5: ++ | checktp CARG2, LJ_TNIL ++ | bne ->fff_restv ++ | b <4 ++ | ++ |6: ++ | checktp CARG2, LJ_TISNUM ++ | ite hs ++ | mvnhs CARG2, CARG2 ++ | movlo CARG2, #~LJ_TISNUM ++ | add CARG4, DISPATCH, CARG2, lsl #2 ++ | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])] ++ | b <2 ++ | ++ |.ffunc_2 setmetatable ++ | // Fast path: no mt for table yet and not clearing the mt. ++ | checktp CARG2, LJ_TTAB ++ | it eq ++ | ldreq TAB:RB, TAB:CARG1->metatable ++ | checktpeq CARG4, LJ_TTAB ++ | it eq ++ | ldrbeq CARG4, TAB:CARG1->marked ++ | it eq ++ | cmpeq TAB:RB, #0 ++ | bne ->fff_fallback ++ | tst CARG4, #LJ_GC_BLACK // isblack(table) ++ | str TAB:CARG3, TAB:CARG1->metatable ++ | beq ->fff_restv ++ | barrierback TAB:CARG1, CARG4, CARG3 ++ | b ->fff_restv ++ | ++ |.ffunc rawget ++ | ldrd CARG3, CARG4, [BASE] ++ | cmp NARGS8:RC, #16 ++ | blo ->fff_fallback ++ | mov CARG2, CARG3 ++ | checktab CARG4, ->fff_fallback ++ | mov CARG1, L ++ | add CARG3, BASE, #8 ++ | .IOS mov RA, BASE ++ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) ++ | // Returns cTValue *. ++ | .IOS mov BASE, RA ++ | ldrd CARG1, CARG2, [CRET1] ++ | b ->fff_restv ++ | ++ |//-- Base library: conversions ------------------------------------------ ++ | ++ |.ffunc tonumber ++ | // Only handles the number case inline (without a base argument). ++ | ldrd CARG1, CARG2, [BASE] ++ | cmp NARGS8:RC, #8 ++ | bne ->fff_fallback ++ | checktp CARG2, LJ_TISNUM ++ | bls ->fff_restv ++ | b ->fff_fallback ++ | ++ |.ffunc_1 tostring ++ | // Only handles the string or number case inline. ++ | checktp CARG2, LJ_TSTR ++ | // A __tostring method in the string base metatable is ignored. ++ | beq ->fff_restv ++ | // Handle numbers inline, unless a number base metatable is present. ++ | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])] ++ | str BASE, L->base ++ | checktp CARG2, LJ_TISNUM ++ | it ls ++ | cmpls CARG4, #0 ++ | str PC, SAVE_PC // Redundant (but a defined value). ++ | bhi ->fff_fallback ++ | ffgccheck ++ | mov CARG1, L ++ | mov CARG2, BASE ++ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) ++ | // Returns GCstr *. ++ | ldr BASE, L->base ++ | mvn CARG2, #~LJ_TSTR ++ | b ->fff_restv ++ | ++ |//-- Base library: iterators ------------------------------------------- ++ | ++ |.ffunc_1 next ++ | mvn CARG4, #~LJ_TNIL ++ | checktab CARG2, ->fff_fallback ++ | strd_i CARG3, CARG4, BASE, NARGS8:RC // Set missing 2nd arg to nil. ++ | ldr PC, [BASE, FRAME_PC] ++ | add CARG2, BASE, #8 ++ | sub CARG3, BASE, #8 ++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // Returns 1=found, 0=end, -1=error. ++ | .IOS ldr BASE, L->base ++ | cmp CRET1, #0 ++ | mov RC, #(2+1)*8 ++ | bgt ->fff_res // Found key/value. ++ | bmi ->fff_fallback // Invalid key. ++ | // End of traversal: return nil. ++ | mvn CRET2, #~LJ_TNIL ++ | b ->fff_restv ++ | ++ |.ffunc_1 pairs ++ | checktab CARG2, ->fff_fallback ++#if LJ_52 ++ | ldr TAB:RB, TAB:CARG1->metatable ++#endif ++ | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0] ++ | ldr PC, [BASE, FRAME_PC] ++#if LJ_52 ++ | cmp TAB:RB, #0 ++ | bne ->fff_fallback ++#endif ++ | mvn CARG2, #~LJ_TNIL ++ | mov RC, #(3+1)*8 ++ | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8] ++ | str CARG2, [BASE, #12] ++ | b ->fff_res ++ | ++ |.ffunc_2 ipairs_aux ++ | checktp CARG2, LJ_TTAB ++ | checktpeq CARG4, LJ_TISNUM ++ | bne ->fff_fallback ++ | ldr RB, TAB:CARG1->asize ++ | ldr RC, TAB:CARG1->array ++ | add CARG3, CARG3, #1 ++ | ldr PC, [BASE, FRAME_PC] ++ | cmp CARG3, RB ++ | add RC, RC, CARG3, lsl #3 ++ | strd CARG3, CARG4, [BASE, #-8] ++ | it lo ++ | ldrdlo CARG1, CARG2, [RC] ++ | mov RC, #(0+1)*8 ++ | bhs >2 // Not in array part? ++ |1: ++ | checktp CARG2, LJ_TNIL ++ | itt ne ++ | movne RC, #(2+1)*8 ++ | strdne CARG1, CARG2, [BASE] ++ | b ->fff_res ++ |2: // Check for empty hash part first. Otherwise call C function. ++ | ldr RB, TAB:CARG1->hmask ++ | mov CARG2, CARG3 ++ | cmp RB, #0 ++ | beq ->fff_res ++ | .IOS mov RA, BASE ++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | .IOS mov BASE, RA ++ | cmp CRET1, #0 ++ | beq ->fff_res ++ | ldrd CARG1, CARG2, [CRET1] ++ | b <1 ++ | ++ |.ffunc_1 ipairs ++ | checktab CARG2, ->fff_fallback ++#if LJ_52 ++ | ldr TAB:RB, TAB:CARG1->metatable ++#endif ++ | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0] ++ | ldr PC, [BASE, FRAME_PC] ++#if LJ_52 ++ | cmp TAB:RB, #0 ++ | bne ->fff_fallback ++#endif ++ | mov CARG1, #0 ++ | mvn CARG2, #~LJ_TISNUM ++ | mov RC, #(3+1)*8 ++ | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8] ++ | strd CARG1, CARG2, [BASE, #8] ++ | b ->fff_res ++ | ++ |//-- Base library: catch errors ---------------------------------------- ++ | ++ |.ffunc pcall ++ | sub RA, DISPATCH, #-DISPATCH_GL(hookmask) ++ | ldrb RA, [RA] ++ | cmp NARGS8:RC, #8 ++ | blo ->fff_fallback ++ | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. ++ | mov RB, BASE ++ | add BASE, BASE, #8 ++ | ite eq ++ | moveq PC, #8+FRAME_PCALL ++ | movne PC, #8+FRAME_PCALLH ++ | sub NARGS8:RC, NARGS8:RC, #8 ++ | b ->vm_call_dispatch ++ | ++ |.ffunc_2 xpcall ++ | sub RA, DISPATCH, #-DISPATCH_GL(hookmask) ++ | ldrb RA, [RA] ++ | checkfunc CARG4, ->fff_fallback // Traceback must be a function. ++ | mov RB, BASE ++ | strd CARG1, CARG2, [BASE, #8] // Swap function and traceback. ++ | strd CARG3, CARG4, [BASE] ++ | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. ++ | add BASE, BASE, #16 ++ | ite eq ++ | moveq PC, #16+FRAME_PCALL ++ | movne PC, #16+FRAME_PCALLH ++ | sub NARGS8:RC, NARGS8:RC, #16 ++ | b ->vm_call_dispatch ++ | ++ |//-- Coroutine library -------------------------------------------------- ++ | ++ |.macro coroutine_resume_wrap, resume ++ |.if resume ++ |.ffunc_1 coroutine_resume ++ | checktp CARG2, LJ_TTHREAD ++ | bne ->fff_fallback ++ |.else ++ |.ffunc coroutine_wrap_aux ++ | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr ++ |.endif ++ | ldr PC, [BASE, FRAME_PC] ++ | str BASE, L->base ++ | ldr CARG2, L:CARG1->top ++ | ldrb RA, L:CARG1->status ++ | ldr RB, L:CARG1->base ++ | add CARG3, CARG2, NARGS8:RC ++ | add CARG4, CARG2, RA ++ | str PC, SAVE_PC ++ | cmp CARG4, RB ++ | beq ->fff_fallback ++ | ldr CARG4, L:CARG1->maxstack ++ | ldr RB, L:CARG1->cframe ++ | cmp RA, #LUA_YIELD ++ | it ls ++ | cmpls CARG3, CARG4 ++ | it ls ++ | cmpls RB, #0 ++ | bhi ->fff_fallback ++ |1: ++ |.if resume ++ | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. ++ | add BASE, BASE, #8 ++ | sub NARGS8:RC, NARGS8:RC, #8 ++ |.endif ++ | str CARG3, L:CARG1->top ++ | str BASE, L->top ++ |2: // Move args to coroutine. ++ | ldrd_i CARG3, CARG4, BASE, RB ++ | cmp RB, NARGS8:RC ++ | strdne_i CARG3, CARG4, CARG2, RB ++ | add RB, RB, #8 ++ | bne <2 ++ | ++ | mov CARG3, #0 ++ | mov L:RA, L:CARG1 ++ | mov CARG4, #0 ++ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) ++ | // Returns thread status. ++ |4: ++ | ldr CARG3, L:RA->base ++ | mv_vmstate CARG2, INTERP ++ | ldr CARG4, L:RA->top ++ | cmp CRET1, #LUA_YIELD ++ | ldr BASE, L->base ++ | str L, [DISPATCH, #DISPATCH_GL(cur_L)] ++ | st_vmstate CARG2 ++ | bhi >8 ++ | subs RC, CARG4, CARG3 ++ | ldr CARG1, L->maxstack ++ | add CARG2, BASE, RC ++ | beq >6 // No results? ++ | cmp CARG2, CARG1 ++ | mov RB, #0 ++ | bhi >9 // Need to grow stack? ++ | ++ | sub CARG4, RC, #8 ++ | str CARG3, L:RA->top // Clear coroutine stack. ++ |5: // Move results from coroutine. ++ | ldrd_i CARG1, CARG2, CARG3, RB ++ | cmp RB, CARG4 ++ | strd_i CARG1, CARG2, BASE, RB ++ | add RB, RB, #8 ++ | bne <5 ++ |6: ++ |.if resume ++ | mvn CARG3, #~LJ_TTRUE ++ | add RC, RC, #16 ++ |7: ++ | str CARG3, [BASE, #-4] // Prepend true/false to results. ++ | sub RA, BASE, #8 ++ |.else ++ | mov RA, BASE ++ | add RC, RC, #8 ++ |.endif ++ | ands CARG1, PC, #FRAME_TYPE ++ | str PC, SAVE_PC ++ | str RC, SAVE_MULTRES ++ | beq ->BC_RET_Z ++ | b ->vm_return ++ | ++ |8: // Coroutine returned with error (at co->top-1). ++ |.if resume ++ | ldrd CARG1, CARG2, [CARG4, #-8]! ++ | mvn CARG3, #~LJ_TFALSE ++ | mov RC, #(2+1)*8 ++ | str CARG4, L:RA->top // Remove error from coroutine stack. ++ | strd CARG1, CARG2, [BASE] // Copy error message. ++ | b <7 ++ |.else ++ | mov CARG1, L ++ | mov CARG2, L:RA ++ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) ++ | // Never returns. ++ |.endif ++ | ++ |9: // Handle stack expansion on return from yield. ++ | mov CARG1, L ++ | lsr CARG2, RC, #3 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | mov CRET1, #0 ++ | b <4 ++ |.endmacro ++ | ++ | coroutine_resume_wrap 1 // coroutine.resume ++ | coroutine_resume_wrap 0 // coroutine.wrap ++ | ++ |.ffunc coroutine_yield ++ | ldr CARG1, L->cframe ++ | add CARG2, BASE, NARGS8:RC ++ | str BASE, L->base ++ | tst CARG1, #CFRAME_RESUME ++ | str CARG2, L->top ++ | mov CRET1, #LUA_YIELD ++ | mov CARG3, #0 ++ | beq ->fff_fallback ++ | str CARG3, L->cframe ++ | strb CRET1, L->status ++ | b ->vm_leave_unw ++ | ++ |//-- Math library ------------------------------------------------------- ++ | ++ |.macro math_round, func ++ | .ffunc_1 math_ .. func ++ | checktp CARG2, LJ_TISNUM ++ | beq ->fff_restv ++ | bhi ->fff_fallback ++ | // Round FP value and normalize result. ++ | lsl CARG3, CARG2, #1 ++ | adds RB, CARG3, #0x00200000 ++ | bpl >2 // |x| < 1? ++ | mvn CARG4, #0x3e0 ++ | subs RB, CARG4, RB, asr #21 ++ | lsl CARG4, CARG2, #11 ++ | lsl CARG3, CARG1, #11 ++ | orr CARG4, CARG4, #0x80000000 ++ | rsb INS, RB, #32 ++ | orr CARG4, CARG4, CARG1, lsr #21 ++ | bls >3 // |x| >= 2^31? ++ | lsl CARG1, CARG4, INS ++ | orr CARG3, CARG3, CARG1 ++ | lsr CARG1, CARG4, RB ++ |.if "func" == "floor" ++ | tst CARG3, CARG2, asr #31 ++ | it ne ++ | addne CARG1, CARG1, #1 ++ |.else ++ | bics CARG3, CARG3, CARG2, asr #31 ++ | it ne ++ | addsne CARG1, CARG1, #1 ++ | it vs ++ | ldrdvs CARG1, CARG2, >9 ++ | bvs ->fff_restv ++ |.endif ++ | cmp CARG2, #0 ++ | it lt ++ | rsblt CARG1, CARG1, #0 ++ |1: ++ | mvn CARG2, #~LJ_TISNUM ++ | b ->fff_restv ++ | ++ |2: // |x| < 1 ++ | bcs ->fff_restv // |x| is not finite. ++ | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo ++ |.if "func" == "floor" ++ | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1 ++ | ite eq ++ | moveq CARG1, #0 ++ | mvnne CARG1, #0 ++ |.else ++ | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1 ++ | ite eq ++ | moveq CARG1, #0 ++ | movne CARG1, #1 ++ |.endif ++ | mvn CARG2, #~LJ_TISNUM ++ | b ->fff_restv ++ | ++ |3: // |x| >= 2^31. Check for x == -(2^31). ++ | it eq ++ | cmpeq CARG4, #0x80000000 ++ |.if "func" == "floor" ++ | it eq ++ | cmpeq CARG3, #0 ++ |.endif ++ | bne >4 ++ | cmp CARG2, #0 ++ | it mi ++ | movmi CARG1, #0x80000000 ++ | bmi <1 ++ |4: ++ | bl ->vm_..func.._sf ++ | b ->fff_restv ++ |.endmacro ++ | ++ | math_round floor ++ | math_round ceil ++ | ++ |.align 8 ++ |9: ++ | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!! ++ | ++ |.ffunc_1 math_abs ++ | checktp CARG2, LJ_TISNUM ++ | bhi ->fff_fallback ++ | it ne ++ | bicne CARG2, CARG2, #0x80000000 ++ | bne ->fff_restv ++ | cmp CARG1, #0 ++ | it lt ++ | rsbslt CARG1, CARG1, #0 ++ | it vs ++ | ldrdvs CARG1, CARG2, <9 ++ | // Fallthrough. ++ | ++ |->fff_restv: ++ | // CARG1, CARG2 = TValue result. ++ | ldr PC, [BASE, FRAME_PC] ++ | strd CARG1, CARG2, [BASE, #-8] ++ |->fff_res1: ++ | // PC = return. ++ | mov RC, #(1+1)*8 ++ |->fff_res: ++ | // RC = (nresults+1)*8, PC = return. ++ | ands CARG1, PC, #FRAME_TYPE ++ | it eq ++ | ldreq INS, [PC, #-4] ++ | str RC, SAVE_MULTRES ++ | sub RA, BASE, #8 ++ | bne ->vm_return ++ | decode_RB8 RB, INS ++ |5: ++ | cmp RB, RC // More results expected? ++ | bhi >6 ++ | decode_RA8 CARG1, INS ++ | ins_next1 ++ | ins_next2 ++ | // Adjust BASE. KBASE is assumed to be set for the calling frame. ++ | sub BASE, RA, CARG1 ++ | ins_next3 ++ | ++ |6: // Fill up results with nil. ++ | add CARG2, RA, RC ++ | mvn CARG1, #~LJ_TNIL ++ | add RC, RC, #8 ++ | str CARG1, [CARG2, #-4] ++ | b <5 ++ | ++ |.macro math_extern, func ++ |.if HFABI ++ | .ffunc_d math_ .. func ++ |.else ++ | .ffunc_n math_ .. func ++ |.endif ++ | .IOS mov RA, BASE ++ | bl extern func ++ | .IOS mov BASE, RA ++ |.if HFABI ++ | b ->fff_resd ++ |.else ++ | b ->fff_restv ++ |.endif ++ |.endmacro ++ | ++ |.macro math_extern2, func ++ |.if HFABI ++ | .ffunc_dd math_ .. func ++ |.else ++ | .ffunc_nn math_ .. func ++ |.endif ++ | .IOS mov RA, BASE ++ | bl extern func ++ | .IOS mov BASE, RA ++ |.if HFABI ++ | b ->fff_resd ++ |.else ++ | b ->fff_restv ++ |.endif ++ |.endmacro ++ | ++ |.if FPU ++ | .ffunc_d math_sqrt ++ | vsqrt.f64 d0, d0 ++ |->fff_resd: ++ | ldr PC, [BASE, FRAME_PC] ++ | vstr d0, [BASE, #-8] ++ | b ->fff_res1 ++ |.else ++ | math_extern sqrt ++ |.endif ++ | ++ |.ffunc math_log ++ |.if HFABI ++ | ldr CARG2, [BASE, #4] ++ | cmp NARGS8:RC, #8 // Need exactly 1 argument. ++ | vldr d0, [BASE] ++ | bne ->fff_fallback ++ |.else ++ | ldrd CARG1, CARG2, [BASE] ++ | cmp NARGS8:RC, #8 // Need exactly 1 argument. ++ | bne ->fff_fallback ++ |.endif ++ | checktp CARG2, LJ_TISNUM ++ | bhs ->fff_fallback ++ | .IOS mov RA, BASE ++ | bl extern log ++ | .IOS mov BASE, RA ++ |.if HFABI ++ | b ->fff_resd ++ |.else ++ | b ->fff_restv ++ |.endif ++ | ++ | math_extern log10 ++ | math_extern exp ++ | math_extern sin ++ | math_extern cos ++ | math_extern tan ++ | math_extern asin ++ | math_extern acos ++ | math_extern atan ++ | math_extern sinh ++ | math_extern cosh ++ | math_extern tanh ++ | math_extern2 pow ++ | math_extern2 atan2 ++ | math_extern2 fmod ++ | ++ |.if HFABI ++ | .ffunc math_ldexp ++ | ldr CARG4, [BASE, #4] ++ | ldrd CARG1, CARG2, [BASE, #8] ++ | cmp NARGS8:RC, #16 ++ | blo ->fff_fallback ++ | vldr d0, [BASE] ++ | checktp CARG4, LJ_TISNUM ++ | bhs ->fff_fallback ++ | checktp CARG2, LJ_TISNUM ++ | bne ->fff_fallback ++ | .IOS mov RA, BASE ++ | bl extern ldexp // (double x, int exp) ++ | .IOS mov BASE, RA ++ | b ->fff_resd ++ |.else ++ |.ffunc_2 math_ldexp ++ | checktp CARG2, LJ_TISNUM ++ | bhs ->fff_fallback ++ | checktp CARG4, LJ_TISNUM ++ | bne ->fff_fallback ++ | .IOS mov RA, BASE ++ | bl extern ldexp // (double x, int exp) ++ | .IOS mov BASE, RA ++ | b ->fff_restv ++ |.endif ++ | ++ |.if HFABI ++ |.ffunc_d math_frexp ++ | mov CARG1, sp ++ | .IOS mov RA, BASE ++ | bl extern frexp ++ | .IOS mov BASE, RA ++ | ldr CARG3, [sp] ++ | mvn CARG4, #~LJ_TISNUM ++ | ldr PC, [BASE, FRAME_PC] ++ | vstr d0, [BASE, #-8] ++ | mov RC, #(2+1)*8 ++ | strd CARG3, CARG4, [BASE] ++ | b ->fff_res ++ |.else ++ |.ffunc_n math_frexp ++ | mov CARG3, sp ++ | .IOS mov RA, BASE ++ | bl extern frexp ++ | .IOS mov BASE, RA ++ | ldr CARG3, [sp] ++ | mvn CARG4, #~LJ_TISNUM ++ | ldr PC, [BASE, FRAME_PC] ++ | strd CARG1, CARG2, [BASE, #-8] ++ | mov RC, #(2+1)*8 ++ | strd CARG3, CARG4, [BASE] ++ | b ->fff_res ++ |.endif ++ | ++ |.if HFABI ++ |.ffunc_d math_modf ++ | sub CARG1, BASE, #8 ++ | ldr PC, [BASE, FRAME_PC] ++ | .IOS mov RA, BASE ++ | bl extern modf ++ | .IOS mov BASE, RA ++ | mov RC, #(2+1)*8 ++ | vstr d0, [BASE] ++ | b ->fff_res ++ |.else ++ |.ffunc_n math_modf ++ | sub CARG3, BASE, #8 ++ | ldr PC, [BASE, FRAME_PC] ++ | .IOS mov RA, BASE ++ | bl extern modf ++ | .IOS mov BASE, RA ++ | mov RC, #(2+1)*8 ++ | strd CARG1, CARG2, [BASE] ++ | b ->fff_res ++ |.endif ++ | ++ |.macro math_minmax, name, cond, fcond ++ |.if FPU ++ | .ffunc_1 name ++ | add RB, BASE, RC ++ | checktp CARG2, LJ_TISNUM ++ | add RA, BASE, #8 ++ | bne >4 ++ |1: // Handle integers. ++ | ldrd CARG3, CARG4, [RA] ++ | cmp RA, RB ++ | bhs ->fff_restv ++ | checktp CARG4, LJ_TISNUM ++ | bne >3 ++ | cmp CARG1, CARG3 ++ | add RA, RA, #8 ++ | it cond ++ | mov..cond CARG1, CARG3 ++ | b <1 ++ |3: // Convert intermediate result to number and continue below. ++ | vmov s4, CARG1 ++ | bhi ->fff_fallback ++ | vldr d1, [RA] ++ | vcvt.f64.s32 d0, s4 ++ | b >6 ++ | ++ |4: ++ | vldr d0, [BASE] ++ | bhi ->fff_fallback ++ |5: // Handle numbers. ++ | ldrd CARG3, CARG4, [RA] ++ | vldr d1, [RA] ++ | cmp RA, RB ++ | bhs ->fff_resd ++ | checktp CARG4, LJ_TISNUM ++ | bhs >7 ++ |6: ++ | vcmp.f64 d0, d1 ++ | vmrs ++ | add RA, RA, #8 ++ | it fcond ++ | vmov..fcond.f64 d0, d1 ++ | b <5 ++ |7: // Convert integer to number and continue above. ++ | vmov s4, CARG3 ++ | bhi ->fff_fallback ++ | vcvt.f64.s32 d1, s4 ++ | b <6 ++ | ++ |.else ++ | ++ | .ffunc_1 name ++ | checktp CARG2, LJ_TISNUM ++ | mov RA, #8 ++ | bne >4 ++ |1: // Handle integers. ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | cmp RA, RC ++ | bhs ->fff_restv ++ | checktp CARG4, LJ_TISNUM ++ | bne >3 ++ | cmp CARG1, CARG3 ++ | add RA, RA, #8 ++ | it cond ++ | mov..cond CARG1, CARG3 ++ | b <1 ++ |3: // Convert intermediate result to number and continue below. ++ | bhi ->fff_fallback ++ | bl extern __aeabi_i2d ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | b >6 ++ | ++ |4: ++ | bhi ->fff_fallback ++ |5: // Handle numbers. ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | cmp RA, RC ++ | bhs ->fff_restv ++ | checktp CARG4, LJ_TISNUM ++ | bhs >7 ++ |6: ++ | bl extern __aeabi_cdcmple ++ | add RA, RA, #8 ++ | mov..fcond CARG1, CARG3 ++ | mov..fcond CARG2, CARG4 ++ | b <5 ++ |7: // Convert integer to number and continue above. ++ | bhi ->fff_fallback ++ | strd CARG1, CARG2, TMPD ++ | mov CARG1, CARG3 ++ | bl extern __aeabi_i2d ++ | ldrd CARG3, CARG4, TMPD ++ | b <6 ++ |.endif ++ |.endmacro ++ | ++ | math_minmax math_min, gt, pl ++ | math_minmax math_max, lt, le ++ | ++ |//-- String library ----------------------------------------------------- ++ | ++ |.ffunc string_byte // Only handle the 1-arg case here. ++ | ldrd CARG1, CARG2, [BASE] ++ | ldr PC, [BASE, FRAME_PC] ++ | cmp NARGS8:RC, #8 ++ | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument. ++ | bne ->fff_fallback ++ | ldr CARG3, STR:CARG1->len ++ | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end). ++ | mvn CARG2, #~LJ_TISNUM ++ | cmp CARG3, #0 ++ | ite eq ++ | moveq RC, #(0+1)*8 ++ | movne RC, #(1+1)*8 ++ | strd CARG1, CARG2, [BASE, #-8] ++ | b ->fff_res ++ | ++ |.ffunc string_char // Only handle the 1-arg case here. ++ | ffgccheck ++ | ldrd CARG1, CARG2, [BASE] ++ | ldr PC, [BASE, FRAME_PC] ++ | cmp NARGS8:RC, #8 // Need exactly 1 argument. ++ | checktpeq CARG2, LJ_TISNUM ++ | it eq ++ | bicseq CARG4, CARG1, #255 ++ | mov CARG3, #1 ++ | bne ->fff_fallback ++ | str CARG1, TMPD ++ | mov CARG2, TMPDp // Points to stack. Little-endian. ++ |->fff_newstr: ++ | // CARG2 = str, CARG3 = len. ++ | str BASE, L->base ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | bl extern lj_str_new // (lua_State *L, char *str, size_t l) ++ |->fff_resstr: ++ | // Returns GCstr *. ++ | ldr BASE, L->base ++ | mvn CARG2, #~LJ_TSTR ++ | b ->fff_restv ++ | ++ |.ffunc string_sub ++ | ffgccheck ++ | ldrd CARG1, CARG2, [BASE] ++ | ldrd CARG3, CARG4, [BASE, #16] ++ | cmp NARGS8:RC, #16 ++ | mvn RB, #0 ++ | beq >1 ++ | blo ->fff_fallback ++ | checktp CARG4, LJ_TISNUM ++ | mov RB, CARG3 ++ | bne ->fff_fallback ++ |1: ++ | ldrd CARG3, CARG4, [BASE, #8] ++ | checktp CARG2, LJ_TSTR ++ | it eq ++ | ldreq CARG2, STR:CARG1->len ++ | checktpeq CARG4, LJ_TISNUM ++ | bne ->fff_fallback ++ | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end ++ | add CARG4, CARG2, #1 ++ | cmp CARG3, #0 // if (start < 0) start += len+1 ++ | it lt ++ | addlt CARG3, CARG3, CARG4 ++ | cmp CARG3, #1 // if (start < 1) start = 1 ++ | it lt ++ | movlt CARG3, #1 ++ | cmp RB, #0 // if (end < 0) end += len+1 ++ | it lt ++ | addlt RB, RB, CARG4 ++ | bic RB, RB, RB, asr #31 // if (end < 0) end = 0 ++ | cmp RB, CARG2 // if (end > len) end = len ++ | add CARG1, STR:CARG1, #sizeof(GCstr)-1 ++ | it gt ++ | movgt RB, CARG2 ++ | add CARG2, CARG1, CARG3 ++ | subs CARG3, RB, CARG3 // len = end - start ++ | add CARG3, CARG3, #1 // len += 1 ++ | bge ->fff_newstr ++ |->fff_emptystr: ++ | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty) ++ | mvn CARG2, #~LJ_TSTR ++ | b ->fff_restv ++ | ++ |.macro ffstring_op, name ++ | .ffunc string_ .. name ++ | ffgccheck ++ | ldr CARG3, [BASE, #4] ++ | cmp NARGS8:RC, #8 ++ | ldr STR:CARG2, [BASE] ++ | blo ->fff_fallback ++ | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) ++ | checkstr CARG3, ->fff_fallback ++ | ldr CARG4, SBUF:CARG1->b ++ | str BASE, L->base ++ | str PC, SAVE_PC ++ | str L, SBUF:CARG1->L ++ | str CARG4, SBUF:CARG1->w ++ | bl extern lj_buf_putstr_ .. name ++ | bl extern lj_buf_tostr ++ | b ->fff_resstr ++ |.endmacro ++ | ++ |ffstring_op reverse ++ |ffstring_op lower ++ |ffstring_op upper ++ | ++ |//-- Bit library -------------------------------------------------------- ++ | ++ |// FP number to bit conversion for soft-float. Clobbers r0-r3. ++ |->vm_tobit_fb: ++ | bhi ->fff_fallback ++ |->vm_tobit: ++ | lsl RB, CARG2, #1 ++ | adds RB, RB, #0x00200000 ++ | ittt pl ++ | movpl CARG1, #0 // |x| < 1? ++ | bxpl lr ++ | mvn CARG4, #0x3e0 ++ | subs RB, CARG4, RB, asr #21 ++ | bmi >1 // |x| >= 2^32? ++ | lsl CARG4, CARG2, #11 ++ | orr CARG4, CARG4, #0x80000000 ++ | orr CARG4, CARG4, CARG1, lsr #21 ++ | cmp CARG2, #0 ++ | lsr CARG1, CARG4, RB ++ | it lt ++ | rsblt CARG1, CARG1, #0 ++ | bx lr ++ |1: ++ | add RB, RB, #21 ++ | lsr CARG4, CARG1, RB ++ | rsb RB, RB, #20 ++ | lsl CARG1, CARG2, #12 ++ | cmp CARG2, #0 ++ | lsl CARG1, CARG1, RB ++ | orr CARG1, CARG4, CARG1 ++ | it lt ++ | rsblt CARG1, CARG1, #0 ++ | bx lr ++ | ++ |.macro .ffunc_bit, name ++ | .ffunc_1 bit_..name ++ | checktp CARG2, LJ_TISNUM ++ | it ne ++ | blne ->vm_tobit_fb ++ |.endmacro ++ | ++ |.ffunc_bit tobit ++ | mvn CARG2, #~LJ_TISNUM ++ | b ->fff_restv ++ | ++ |.macro .ffunc_bit_op, name, ins ++ | .ffunc_bit name ++ | mov CARG3, CARG1 ++ | mov RA, #8 ++ |1: ++ | ldrd_i CARG1, CARG2, BASE, RA ++ | cmp RA, NARGS8:RC ++ | add RA, RA, #8 ++ | bge >2 ++ | checktp CARG2, LJ_TISNUM ++ | it ne ++ | blne ->vm_tobit_fb ++ | ins CARG3, CARG3, CARG1 ++ | b <1 ++ |.endmacro ++ | ++ |.ffunc_bit_op band, and ++ |.ffunc_bit_op bor, orr ++ |.ffunc_bit_op bxor, eor ++ | ++ |2: ++ | mvn CARG4, #~LJ_TISNUM ++ | ldr PC, [BASE, FRAME_PC] ++ | strd CARG3, CARG4, [BASE, #-8] ++ | b ->fff_res1 ++ | ++ |.ffunc_bit bswap ++ | eor CARG3, CARG1, CARG1, ror #16 ++ | bic CARG3, CARG3, #0x00ff0000 ++ | ror CARG1, CARG1, #8 ++ | mvn CARG2, #~LJ_TISNUM ++ | eor CARG1, CARG1, CARG3, lsr #8 ++ | b ->fff_restv ++ | ++ |.ffunc_bit bnot ++ | mvn CARG1, CARG1 ++ | mvn CARG2, #~LJ_TISNUM ++ | b ->fff_restv ++ | ++ |.macro .ffunc_bit_sh, name, ins, shmod ++ | .ffunc bit_..name ++ | ldrd CARG1, CARG2, [BASE, #8] ++ | cmp NARGS8:RC, #16 ++ | blo ->fff_fallback ++ | checktp CARG2, LJ_TISNUM ++ | it ne ++ | blne ->vm_tobit_fb ++ |.if shmod == 0 ++ | and RA, CARG1, #31 ++ |.else ++ | rsb RA, CARG1, #0 ++ |.endif ++ | ldrd CARG1, CARG2, [BASE] ++ | checktp CARG2, LJ_TISNUM ++ | it ne ++ | blne ->vm_tobit_fb ++ | ins CARG1, CARG1, RA ++ | mvn CARG2, #~LJ_TISNUM ++ | b ->fff_restv ++ |.endmacro ++ | ++ |.ffunc_bit_sh lshift, lsl, 0 ++ |.ffunc_bit_sh rshift, lsr, 0 ++ |.ffunc_bit_sh arshift, asr, 0 ++ |.ffunc_bit_sh rol, ror, 1 ++ |.ffunc_bit_sh ror, ror, 0 ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->fff_fallback: // Call fast function fallback handler. ++ | // BASE = new base, RC = nargs*8 ++ | ldr CARG3, [BASE, FRAME_FUNC] ++ | ldr CARG2, L->maxstack ++ | add CARG1, BASE, NARGS8:RC ++ | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC. ++ | str CARG1, L->top ++ | ldr CARG3, CFUNC:CARG3->f ++ | str BASE, L->base ++ | add CARG1, CARG1, #8*LUA_MINSTACK ++ | str PC, SAVE_PC // Redundant (but a defined value). ++ | cmp CARG1, CARG2 ++ | mov CARG1, L ++ | bhi >5 // Need to grow stack. ++ | blx CARG3 // (lua_State *L) ++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1. ++ | ldr BASE, L->base ++ | cmp CRET1, #0 ++ | lsl RC, CRET1, #3 ++ | sub RA, BASE, #8 ++ | bgt ->fff_res // Returned nresults+1? ++ |1: // Returned 0 or -1: retry fast path. ++ | ldr CARG1, L->top ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] ++ | sub NARGS8:RC, CARG1, BASE ++ | bne ->vm_call_tail // Returned -1? ++ | ins_callt // Returned 0: retry fast path. ++ | ++ |// Reconstruct previous base for vmeta_call during tailcall. ++ |->vm_call_tail: ++ | ands CARG1, PC, #FRAME_TYPE ++ | bic CARG2, PC, #FRAME_TYPEP ++ | ittt eq ++ | ldreq INS, [PC, #-4] ++ | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8. ++ | addeq CARG2, CARG2, #8 ++ | sub RB, BASE, CARG2 ++ | b ->vm_call_dispatch // Resolve again for tailcall. ++ | ++ |5: // Grow stack for fallback handler. ++ | mov CARG2, #LUA_MINSTACK ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ldr BASE, L->base ++ | cmp CARG1, CARG1 // Set zero-flag to force retry. ++ | b <1 ++ | ++ |->fff_gcstep: // Call GC step function. ++ | // BASE = new base, RC = nargs*8 ++ | mov RA, lr ++ | str BASE, L->base ++ | add CARG2, BASE, NARGS8:RC ++ | str PC, SAVE_PC // Redundant (but a defined value). ++ | str CARG2, L->top ++ | mov CARG1, L ++ | bl extern lj_gc_step // (lua_State *L) ++ | ldr BASE, L->base ++ | mov lr, RA // Help return address predictor. ++ | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] ++ | bx lr ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Special dispatch targets ------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_record: // Dispatch target for recording phase. ++ |.if JIT ++ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask) ++ | ldrb CARG1, [CARG1] ++ | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. ++ | bne >5 ++ | // Decrement the hookcount for consistency, but always do the call. ++ | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount) ++ | ldr CARG2, [CARG2] ++ | tst CARG1, #HOOK_ACTIVE ++ | bne >1 ++ | sub CARG2, CARG2, #1 ++ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT ++ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookcount) ++ | it ne ++ | strne CARG2, [CARG1] ++ | b >1 ++ |.endif ++ | ++ |->vm_rethook: // Dispatch target for return hooks. ++ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask) ++ | ldrb CARG1, [CARG1] ++ | tst CARG1, #HOOK_ACTIVE // Hook already active? ++ | beq >1 ++ |5: // Re-dispatch to static ins. ++ | decode_OP OP, INS ++ | add OP, DISPATCH, OP, lsl #2 ++ | ldr pc, [OP, #GG_DISP2STATIC] ++ | ++ |->vm_inshook: // Dispatch target for instr/line hooks. ++ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask) ++ | ldrb CARG1, [CARG1] ++ | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount) ++ | ldr CARG2, [CARG2] ++ | tst CARG1, #HOOK_ACTIVE // Hook already active? ++ | bne <5 ++ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT ++ | beq <5 ++ | subs CARG2, CARG2, #1 ++ | sub CARG3, DISPATCH, #-DISPATCH_GL(hookcount) ++ | str CARG2, [CARG3] ++ | beq >1 ++ | tst CARG1, #LUA_MASKLINE ++ | beq <5 ++ |1: ++ | mov CARG1, L ++ | str BASE, L->base ++ | mov CARG2, PC ++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. ++ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) ++ |3: ++ | ldr BASE, L->base ++ |4: // Re-dispatch to static ins. ++ | ldrb OP, [PC, #-4] ++ | ldr INS, [PC, #-4] ++ | add OP, DISPATCH, OP, lsl #2 ++ | ldr OP, [OP, #GG_DISP2STATIC] ++ | decode_RA8 RA, INS ++ | decode_RD RC, INS ++ | bx OP ++ | ++ |->cont_hook: // Continue from hook yield. ++ | ldr CARG1, [CARG4, #-24] ++ | add PC, PC, #4 ++ | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins. ++ | b <4 ++ | ++ |->vm_hotloop: // Hot loop counter underflow. ++ |.if JIT ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). ++ | sub CARG1, DISPATCH, #-GG_DISP2J ++ | str PC, SAVE_PC ++ | ldr CARG3, LFUNC:CARG3->field_pc ++ | mov CARG2, PC ++ | sub RB, DISPATCH, #-DISPATCH_J(L) ++ | str L, [RB] ++ | ldrb CARG3, [CARG3, #PC2PROTO(framesize)] ++ | str BASE, L->base ++ | add CARG3, BASE, CARG3, lsl #3 ++ | str CARG3, L->top ++ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) ++ | b <3 ++ |.endif ++ | ++ |->vm_callhook: // Dispatch target for call hooks. ++ | mov CARG2, PC ++ |.if JIT ++ | b >1 ++ |.endif ++ | ++ |->vm_hotcall: // Hot call counter underflow. ++ |.if JIT ++ | orr CARG2, PC, #1 ++ |1: ++ |.endif ++ | add CARG4, BASE, RC ++ | str PC, SAVE_PC ++ | mov CARG1, L ++ | str BASE, L->base ++ | sub RA, RA, BASE ++ | str CARG4, L->top ++ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) ++ | // Returns ASMFunction. ++ | ldr BASE, L->base ++ | ldr CARG4, L->top ++ | mov CARG2, #0 ++ | add RA, BASE, RA ++ | sub NARGS8:RC, CARG4, BASE ++ | str CARG2, SAVE_PC // Invalidate for subsequent line hook. ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] ++ | ldr INS, [PC, #-4] ++ | bx CRET1 ++ | ++ |->cont_stitch: // Trace stitching. ++ |.if JIT ++ | // RA = resultptr, CARG4 = meta base ++ | ldr RB, SAVE_MULTRES ++ | ldr INS, [PC, #-4] ++ | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. ++ | subs RB, RB, #8 ++ | decode_RA8 RC, INS // Call base. ++ | beq >2 ++ |1: // Move results down. ++ | ldrd CARG1, CARG2, [RA] ++ | add RA, RA, #8 ++ | subs RB, RB, #8 ++ | strd_i CARG1, CARG2, BASE, RC ++ | add RC, RC, #8 ++ | bne <1 ++ |2: ++ | decode_RA8 RA, INS ++ | decode_RB8 RB, INS ++ | add RA, RA, RB ++ |3: ++ | cmp RA, RC ++ | mvn CARG2, #~LJ_TNIL ++ | bhi >9 // More results wanted? ++ | ++ | ldrh RA, TRACE:CARG3->traceno ++ | ldrh RC, TRACE:CARG3->link ++ | cmp RC, RA ++ | beq ->cont_nop // Blacklisted. ++ | cmp RC, #0 ++ | bne =>BC_JLOOP // Jump to stitched trace. ++ | ++ | // Stitch a new trace to the previous trace. ++ | sub RB, DISPATCH, #-DISPATCH_J(exitno) ++ | str RA, [RB] ++ | sub RB, DISPATCH, #-DISPATCH_J(L) ++ | str L, [RB] ++ | str BASE, L->base ++ | sub CARG1, DISPATCH, #-GG_DISP2J ++ | mov CARG2, PC ++ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) ++ | ldr BASE, L->base ++ | b ->cont_nop ++ | ++ |9: // Fill up results with nil. ++ | strd_i CARG1, CARG2, BASE, RC ++ | add RC, RC, #8 ++ | b <3 ++ |.endif ++ | ++ |->vm_profhook: // Dispatch target for profiler hook. ++#if LJ_HASPROFILE ++ | mov CARG1, L ++ | str BASE, L->base ++ | mov CARG2, PC ++ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) ++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. ++ | ldr BASE, L->base ++ | sub PC, PC, #4 ++ | b ->cont_nop ++#endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Trace exit handler ------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_exit_handler: ++ |.if JIT ++ | sub sp, sp, #12 ++ | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12} ++ | ldr CARG1, [sp, #64] // Load original value of lr. ++ | ldr DISPATCH, [lr, #-1] // Load DISPATCH. ++ | add CARG3, sp, #64 // Recompute original value of sp. ++ | mv_vmstate CARG4, EXIT ++ | str CARG3, [sp, #52] // Store sp in RID_SP ++ | st_vmstate CARG4 ++ | ldr CARG4, [CARG1, #-5]! // Get exit instruction. ++ | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. ++ | str CARG1, [sp, #60] ++ |.if FPU ++ | vpush {d0-d15} ++ |.endif ++ | .long 0xf3432180 //SBFX CARG2, CARG4, #10, #1 ++ | .long 0xf36321d4 //BFI CARG2, CARG4, #11, #10 ++ | lsr CARG4, CARG4, #16 ++ | .long 0xf363010a //BFI CARG2, CARG4, #0, #11 ++ | add CARG1, CARG1, CARG2, lsl #1 ++ | ldr CARG2, [lr, #3] // Load exit stub group offset. ++ | sub CARG1, CARG1, lr ++ | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] ++ | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. ++ | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] ++ | sub RB, DISPATCH, #-DISPATCH_J(exitno) ++ | str CARG1, [RB] ++ | mov CARG4, #0 ++ | str BASE, L->base ++ | sub RB, DISPATCH, #-DISPATCH_J(L) ++ | str L, [RB] ++ | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] ++ | sub CARG1, DISPATCH, #-GG_DISP2J ++ | mov CARG2, sp ++ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) ++ | // Returns MULTRES (unscaled) or negated error code. ++ | ldr CARG2, L->cframe ++ | ldr BASE, L->base ++ | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. ++ | mov sp, CARG2 ++ | ldr PC, SAVE_PC // Get SAVE_PC. ++ | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). ++ | b >1 ++ |.endif ++ |->vm_exit_interp: ++ | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set. ++ |.if JIT ++ | ldr L, SAVE_L ++ |1: ++ | cmp CARG1, #0 ++ | blt >9 // Check for error from exit. ++ | lsl RC, CARG1, #3 ++ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] ++ | str RC, SAVE_MULTRES ++ | mov CARG3, #0 ++ | str BASE, L->base ++ | ldr CARG2, LFUNC:CARG2->field_pc ++ | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] ++ | mv_vmstate CARG4, INTERP ++ | ldr KBASE, [CARG2, #PC2PROTO(k)] ++ | // Modified copy of ins_next which handles function header dispatch, too. ++ | ldrb OP, [PC] ++ | mov MASKR8, #255 ++ | ldr INS, [PC], #4 ++ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. ++ | st_vmstate CARG4 ++ | cmp OP, #BC_FUNCC+2 // Fast function? ++ | bhs >4 ++ |2: ++ | cmp OP, #BC_FUNCF // Function header? ++ | ldr OP, [DISPATCH, OP, lsl #2] ++ | decode_RA8 RA, INS ++ | iteee lo ++ | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. ++ | subhs RC, RC, #8 ++ | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 ++ | ldrhs CARG3, [BASE, FRAME_FUNC] ++ | bx OP ++ | ++ |4: // Check frame below fast function. ++ | ldr CARG1, [BASE, FRAME_PC] ++ | ands CARG2, CARG1, #FRAME_TYPE ++ | bne <2 // Trace stitching continuation? ++ | // Otherwise set KBASE for Lua function below fast function. ++ | ldr CARG3, [CARG1, #-4] ++ | decode_RA8 CARG1, CARG3 ++ | sub CARG2, BASE, CARG1 ++ | ldr LFUNC:CARG3, [CARG2, #-16] ++ | ldr CARG3, LFUNC:CARG3->field_pc ++ | ldr KBASE, [CARG3, #PC2PROTO(k)] ++ | b <2 ++ | ++ |9: // Rethrow error from the right C frame. ++ | mov CARG1, L ++ | bl extern lj_err_run // (lua_State *L) ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Math helper functions ---------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// FP value rounding. Called from JIT code. ++ |// ++ |// double lj_vm_floor/ceil/trunc(double x); ++ |.macro vm_round, func, hf ++ |.if hf == 1 ++ | vmov CARG1, CARG2, d0 ++ |.endif ++ | lsl CARG3, CARG2, #1 ++ | adds RB, CARG3, #0x00200000 ++ | bpl >2 // |x| < 1? ++ | mvn CARG4, #0x3cc ++ | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. ++ | itt lo ++ | bxlo lr // |x| >= 2^52: done. ++ | mvn CARG4, #1 ++ | lsl CARG4, CARG4, RB ++ | bic CARG3, CARG1, CARG4 // ztest = lo & ~lomask ++ | and CARG1, CARG1, CARG4 // lo &= lomask ++ | subs RB, RB, #32 ++ | mvn CARG4, #1 ++ | itttt pl ++ | lslpl CARG4, CARG4, RB ++ | bicpl CARG4, CARG2, CARG4 // |x| <= 2^20: ztest |= hi & ~himask ++ | orrpl CARG3, CARG3, CARG4 ++ | mvnpl CARG4, #1 ++ | itt pl ++ | lslpl CARG4, CARG4, RB ++ | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask ++ | mvn CARG4, #1 ++ |.if "func" == "floor" ++ | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) ++ |.else ++ | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) ++ |.endif ++ |.if hf == 1 ++ | it eq ++ | vmoveq d0, CARG1, CARG2 ++ |.endif ++ | itt eq ++ | bxeq lr // iszero: done. ++ | mvn CARG4, #1 ++ | cmp RB, #0 ++ | ite pl ++ | lslpl CARG3, CARG4, RB ++ | mvnmi CARG3, #0 ++ | add RB, RB, #32 ++ | lsl CARG4, CARG4, RB ++ | subs CARG1, CARG1, CARG4 // lo = lo-lomask ++ | mvn CARG4, #1 ++ | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry ++ |.if hf == 1 ++ | vmov d0, CARG1, CARG2 ++ |.endif ++ | bx lr ++ | ++ |2: // |x| < 1: ++ | itt cs ++ | bxcs lr // |x| is not finite. ++ | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo ++ |.if "func" == "floor" ++ | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) ++ |.else ++ | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) ++ |.endif ++ | mov CARG1, #0 // lo = 0 ++ | and CARG2, CARG2, #0x80000000 ++ | itt ne ++ | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) ++ | orrne CARG2, CARG2, CARG4 ++ |.if hf == 1 ++ | vmov d0, CARG1, CARG2 ++ |.endif ++ | bx lr ++ |.endmacro ++ | ++ |9: ++ | .long 0x00003ff0 // hiword(+1.0) jturnsek: swaped halfwords!!! ++ | ++ |->vm_floor: ++ |.if HFABI ++ | vm_round floor, 1 ++ |.endif ++ |->vm_floor_sf: ++ | vm_round floor, 0 ++ | ++ |->vm_ceil: ++ |.if HFABI ++ | vm_round ceil, 1 ++ |.endif ++ |->vm_ceil_sf: ++ | vm_round ceil, 0 ++ | ++ |.macro vm_trunc, hf ++ |.if JIT ++ |.if hf == 1 ++ | vmov CARG1, CARG2, d0 ++ |.endif ++ | lsl CARG3, CARG2, #1 ++ | adds RB, CARG3, #0x00200000 ++ | itt pl ++ | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. ++ | movpl CARG1, #0 ++ |.if hf == 1 ++ | it pl ++ | vmovpl d0, CARG1, CARG2 ++ |.endif ++ | itt pl ++ | bxpl lr ++ | mvn CARG4, #0x3cc ++ | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. ++ | itt lo ++ | bxlo lr // |x| >= 2^52: already done. ++ | mvn CARG4, #1 ++ | lsl CARG4, CARG4, RB ++ | and CARG1, CARG1, CARG4 // lo &= lomask ++ | subs RB, RB, #32 ++ | mvn CARG4, #1 ++ | itt pl ++ | lsl CARG4, CARG4, RB ++ | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask ++ |.if hf == 1 ++ | vmov d0, CARG1, CARG2 ++ |.endif ++ | bx lr ++ |.endif ++ |.endmacro ++ | ++ |->vm_trunc: ++ |.if HFABI ++ | vm_trunc 1 ++ |.endif ++ |->vm_trunc_sf: ++ | vm_trunc 0 ++ | ++ | // double lj_vm_mod(double dividend, double divisor); ++ |->vm_mod: ++ |.if FPU ++ | // Special calling convention. Also, RC (r11) is not preserved. ++ | vdiv.f64 d0, d6, d7 ++ | mov RC, lr ++ | vmov CARG1, CARG2, d0 ++ | bl ->vm_floor_sf ++ | vmov d0, CARG1, CARG2 ++ | vmul.f64 d0, d0, d7 ++ | mov lr, RC ++ | vsub.f64 d6, d6, d0 ++ | bx lr ++ |.else ++ | push {r0, r1, r2, r3, r4, lr} ++ | bl extern __aeabi_ddiv ++ | bl ->vm_floor_sf ++ | ldrd CARG3, CARG4, [sp, #8] ++ | bl extern __aeabi_dmul ++ | ldrd CARG3, CARG4, [sp] ++ | eor CARG2, CARG2, #0x80000000 ++ | bl extern __aeabi_dadd ++ | add sp, sp, #20 ++ | pop {pc} ++ |.endif ++ | ++ | // int lj_vm_modi(int dividend, int divisor); ++ |->vm_modi: ++ | ands RB, CARG1, #0x80000000 ++ | it mi ++ | rsbmi CARG1, CARG1, #0 // a = |dividend| ++ | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor). ++ | cmp CARG2, #0 ++ | it mi ++ | rsbmi CARG2, CARG2, #0 // b = |divisor| ++ | subs CARG4, CARG2, #1 ++ | ite ne ++ | cmpne CARG1, CARG2 ++ | moveq CARG1, #0 // if (b == 1 || a == b) a = 0 ++ | it hi ++ | tsthi CARG2, CARG4 ++ | it eq ++ | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1 ++ | bls >1 ++ | // Use repeated subtraction to get the remainder. ++ | clz CARG3, CARG1 ++ | clz CARG4, CARG2 ++ | sub CARG4, CARG4, CARG3 ++ | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*12 ++ | it ne ++ | .long 0xe8dff002 // tbbne [pc, CARG3] // Duff's device. ++ | .long 0xb8bec4ca, 0xa0a6acb2, 0x888e949a, 0x70767c82 // TBB table (part1) ++ | .long 0x585e646a, 0x40464c52, 0x282e343a, 0x10161c22 // TBB table (part2) ++ { ++ int i; ++ for (i = 31; i >= 0; i--) { ++ | cmp CARG1, CARG2, lsl #i ++ | it hs ++ | subhs CARG1, CARG1, CARG2, lsl #i ++ } ++ } ++ |1: ++ | cmp CARG1, #0 ++ | it ne ++ | cmpne RB, #0 ++ | it mi ++ | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b ++ | eors CARG2, CARG1, RB, lsl #1 ++ | it mi ++ | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y ++ | bx lr ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Miscellaneous functions -------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_RES, CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_TMP0, CARG3 ++ |.define NEXT_TMP1, CARG4 ++ |.define NEXT_LIM, r12 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, [sp] ++ |.define NEXT_RES_KEY_I, [sp, #8] ++ |.define NEXT_RES_KEY_IT, [sp, #12] ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |->vm_next: ++ |.if JIT ++ | ldr NEXT_TMP0, NEXT_TAB->array ++ | ldr NEXT_LIM, NEXT_TAB->asize ++ | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 ++ |1: // Traverse array part. ++ | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM ++ | bhs >5 ++ | ldr NEXT_TMP1, [NEXT_TMP0, #4] ++ | str NEXT_IDX, NEXT_RES_KEY_I ++ | add NEXT_TMP0, NEXT_TMP0, #8 ++ | add NEXT_IDX, NEXT_IDX, #1 ++ | checktp NEXT_TMP1, LJ_TNIL ++ | beq <1 // Skip holes in array part. ++ | ldr NEXT_TMP0, [NEXT_TMP0, #-8] ++ | mov NEXT_RES, NEXT_RES_PTR ++ | strd NEXT_TMP0, NEXT_TMP1, NEXT_RES_VAL ++ | mvn NEXT_TMP0, #~LJ_TISNUM ++ | str NEXT_TMP0, NEXT_RES_KEY_IT ++ | bx lr ++ | ++ |5: // Traverse hash part. ++ | ldr NEXT_TMP0, NEXT_TAB->hmask ++ | ldr NODE:NEXT_RES, NEXT_TAB->node ++ | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 ++ | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 ++ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 ++ |6: ++ | cmp NEXT_IDX, NEXT_LIM ++ | bhi >9 ++ | ldr NEXT_TMP1, NODE:NEXT_RES->val.it ++ | checktp NEXT_TMP1, LJ_TNIL ++ | add NEXT_IDX, NEXT_IDX, #1 ++ | itt ne ++ | bxne lr ++ | // Skip holes in hash part. ++ | add NEXT_RES, NEXT_RES, #sizeof(Node) ++ | b <6 ++ | ++ |9: // End of iteration. Set the key to nil (not the value). ++ | mvn NEXT_TMP0, #0 ++ | mov NEXT_RES, NEXT_RES_PTR ++ | str NEXT_TMP0, NEXT_RES_KEY_IT ++ | bx lr ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- FFI helper functions ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Handler for callback functions. ++ |// Saveregs already performed. Callback slot number in [sp], g in r12. ++ |->vm_ffi_callback: ++ |.if FFI ++ |.type CTSTATE, CTState, PC ++ | ldr CTSTATE, GL:r12->ctype_state ++ | add DISPATCH, r12, #GG_G2DISP ++ |.if FPU ++ | str r4, SAVE_R4 ++ | add r4, sp, CFRAME_SPACE+4+8*8 ++ | vstmdb r4!, {d8-d15} ++ |.endif ++ |.if HFABI ++ | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8]) ++ |.endif ++ | strd CARG3, CARG4, CTSTATE->cb.gpr[2] ++ | strd CARG1, CARG2, CTSTATE->cb.gpr[0] ++ |.if HFABI ++ | vstmdb r12!, {d0-d7} ++ |.endif ++ | ldr CARG4, [sp] ++ | add CARG3, sp, #CFRAME_SIZE ++ | mov CARG1, CTSTATE ++ | lsr CARG4, CARG4, #3 ++ | str CARG3, CTSTATE->cb.stack ++ | mov CARG2, sp ++ | str CARG4, CTSTATE->cb.slot ++ | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. ++ | bl extern lj_ccallback_enter // (CTState *cts, void *cf) ++ | // Returns lua_State *. ++ | ldr BASE, L:CRET1->base ++ | mv_vmstate CARG2, INTERP ++ | ldr RC, L:CRET1->top ++ | mov MASKR8, #255 ++ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] ++ | mov L, CRET1 ++ | sub RC, RC, BASE ++ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. ++ | st_vmstate CARG2 ++ | ins_callt ++ |.endif ++ | ++ |->cont_ffi_callback: // Return from FFI callback. ++ |.if FFI ++ | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)] ++ | str BASE, L->base ++ | str CARG4, L->top ++ | str L, CTSTATE->L ++ | mov CARG1, CTSTATE ++ | mov CARG2, RA ++ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) ++ | ldrd CARG1, CARG2, CTSTATE->cb.gpr[0] ++ |.if HFABI ++ | vldr d0, CTSTATE->cb.fpr[0] ++ |.endif ++ | b ->vm_leave_unw ++ |.endif ++ | ++ |->vm_ffi_call: // Call C function via FFI. ++ | // Caveat: needs special frame unwinding, see below. ++ |.if FFI ++ | .type CCSTATE, CCallState, r4 ++ | push {CCSTATE, r5, r11, lr} ++ | mov CCSTATE, CARG1 ++ | ldr CARG1, CCSTATE:CARG1->spadj ++ | ldrb CARG2, CCSTATE->nsp ++ | add CARG3, CCSTATE, #offsetof(CCallState, stack) ++ |.if HFABI ++ | add RB, CCSTATE, #offsetof(CCallState, fpr[0]) ++ |.endif ++ | mov r11, sp ++ | sub sp, sp, CARG1 // Readjust stack. ++ | subs CARG2, CARG2, #1 ++ |.if HFABI ++ | vldm RB, {d0-d7} ++ |.endif ++ | ldr RB, CCSTATE->func ++ | bmi >2 ++ |1: // Copy stack slots. ++ | ldr CARG4, [CARG3, CARG2, lsl #2] ++ | str CARG4, [sp, CARG2, lsl #2] ++ | subs CARG2, CARG2, #1 ++ | bpl <1 ++ |2: ++ | ldrd CARG1, CARG2, CCSTATE->gpr[0] ++ | ldrd CARG3, CARG4, CCSTATE->gpr[2] ++ | blx RB ++ | mov sp, r11 ++ |.if HFABI ++ | add r12, CCSTATE, #offsetof(CCallState, fpr[4]) ++ |.endif ++ | strd CRET1, CRET2, CCSTATE->gpr[0] ++ |.if HFABI ++ | vstmdb r12!, {d0-d3} ++ |.endif ++ | pop {CCSTATE, r5, r11, pc} ++ |.endif ++ |// Note: vm_ffi_call must be the last function in this object file! ++ | ++ |//----------------------------------------------------------------------- ++} ++ ++/* Generate the code for a single instruction. */ ++static void build_ins(BuildCtx *ctx, BCOp op, int defop) ++{ ++ int vk = 0; ++ |=>defop: ++ ++ switch (op) { ++ ++ /* -- Comparison ops ---------------------------------------------------- */ ++ ++ /* Remember: all ops branch for a true comparison, fall through otherwise. */ ++ ++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: ++ | // RA = src1*8, RC = src2, JMP with RC = target ++ | lsl RC, RC, #3 ++ | ldrd_iw CARG1, CARG2, RA, BASE ++ | ldrh RB, [PC, #2] ++ | ldrd_iw CARG3, CARG4, RC, BASE ++ | add PC, PC, #4 ++ | add RB, PC, RB, lsl #2 ++ | checktp CARG2, LJ_TISNUM ++ | bne >3 ++ | checktp CARG4, LJ_TISNUM ++ | bne >4 ++ | cmp CARG1, CARG3 ++ if (op == BC_ISLT) { ++ | it lt ++ | sublt PC, RB, #0x20000 ++ } else if (op == BC_ISGE) { ++ | it ge ++ | subge PC, RB, #0x20000 ++ } else if (op == BC_ISLE) { ++ | it le ++ | suble PC, RB, #0x20000 ++ } else { ++ | it gt ++ | subgt PC, RB, #0x20000 ++ } ++ |1: ++ | ins_next ++ | ++ |3: // CARG1, CARG2 is not an integer. ++ |.if FPU ++ | vldr d0, [RA] ++ | bhi ->vmeta_comp ++ | // d0 is a number. ++ | checktp CARG4, LJ_TISNUM ++ | vldr d1, [RC] ++ | blo >5 ++ | bhi ->vmeta_comp ++ | // d0 is a number, CARG3 is an integer. ++ | vmov s4, CARG3 ++ | vcvt.f64.s32 d1, s4 ++ | b >5 ++ |4: // CARG1 is an integer, CARG3, CARG4 is not an integer. ++ | vldr d1, [RC] ++ | bhi ->vmeta_comp ++ | // CARG1 is an integer, d1 is a number. ++ | vmov s4, CARG1 ++ | vcvt.f64.s32 d0, s4 ++ |5: // d0 and d1 are numbers. ++ | vcmp.f64 d0, d1 ++ | vmrs ++ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. ++ if (op == BC_ISLT) { ++ | it lo ++ | sublo PC, RB, #0x20000 ++ } else if (op == BC_ISGE) { ++ | it hs ++ | subhs PC, RB, #0x20000 ++ } else if (op == BC_ISLE) { ++ | it ls ++ | subls PC, RB, #0x20000 ++ } else { ++ | it hi ++ | subhi PC, RB, #0x20000 ++ } ++ | b <1 ++ |.else ++ | bhi ->vmeta_comp ++ | // CARG1, CARG2 is a number. ++ | checktp CARG4, LJ_TISNUM ++ | it lo ++ | movlo RA, RB // Save RB. ++ | blo >5 ++ | bhi ->vmeta_comp ++ | // CARG1, CARG2 is a number, CARG3 is an integer. ++ | mov CARG1, CARG3 ++ | mov RC, RA ++ | mov RA, RB // Save RB. ++ | bl extern __aeabi_i2d ++ | mov CARG3, CARG1 ++ | mov CARG4, CARG2 ++ | ldrd CARG1, CARG2, [RC] // Restore first operand. ++ | b >5 ++ |4: // CARG1 is an integer, CARG3, CARG4 is not an integer. ++ | bhi ->vmeta_comp ++ | // CARG1 is an integer, CARG3, CARG4 is a number. ++ | mov RA, RB // Save RB. ++ | bl extern __aeabi_i2d ++ | ldrd CARG3, CARG4, [RC] // Restore second operand. ++ |5: // CARG1, CARG2 and CARG3, CARG4 are numbers. ++ | bl extern __aeabi_cdcmple ++ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. ++ if (op == BC_ISLT) { ++ | it lo ++ | sublo PC, RA, #0x20000 ++ } else if (op == BC_ISGE) { ++ | it hs ++ | subhs PC, RA, #0x20000 ++ } else if (op == BC_ISLE) { ++ | it ls ++ | subls PC, RA, #0x20000 ++ } else { ++ | it hi ++ | subhi PC, RA, #0x20000 ++ } ++ | b <1 ++ |.endif ++ break; ++ ++ case BC_ISEQV: case BC_ISNEV: ++ vk = op == BC_ISEQV; ++ | // RA = src1*8, RC = src2, JMP with RC = target ++ | lsl RC, RC, #3 ++ | ldrd_iw CARG1, CARG2, RA, BASE ++ | ldrh RB, [PC, #2] ++ | ldrd_iw CARG3, CARG4, RC, BASE ++ | add PC, PC, #4 ++ | add RB, PC, RB, lsl #2 ++ | checktp CARG2, LJ_TISNUM ++ | it ls ++ | cmnls CARG4, #-LJ_TISNUM ++ if (vk) { ++ | bls ->BC_ISEQN_Z ++ } else { ++ | bls ->BC_ISNEN_Z ++ } ++ | // Either or both types are not numbers. ++ |.if FFI ++ | checktp CARG2, LJ_TCDATA ++ | checktpne CARG4, LJ_TCDATA ++ | beq ->vmeta_equal_cd ++ |.endif ++ | cmp CARG2, CARG4 // Compare types. ++ | bne >2 // Not the same type? ++ | checktp CARG2, LJ_TISPRI ++ | bhs >1 // Same type and primitive type? ++ | ++ | // Same types and not a primitive type. Compare GCobj or pvalue. ++ | cmp CARG1, CARG3 ++ if (vk) { ++ | bne >3 // Different GCobjs or pvalues? ++ |1: // Branch if same. ++ | sub PC, RB, #0x20000 ++ |2: // Different. ++ | ins_next ++ |3: ++ | checktp CARG2, LJ_TISTABUD ++ | bhi <2 // Different objects and not table/ud? ++ } else { ++ | beq >1 // Same GCobjs or pvalues? ++ | checktp CARG2, LJ_TISTABUD ++ | bhi >2 // Different objects and not table/ud? ++ } ++ | // Different tables or userdatas. Need to check __eq metamethod. ++ | // Field metatable must be at same offset for GCtab and GCudata! ++ | ldr TAB:RA, TAB:CARG1->metatable ++ | cmp TAB:RA, #0 ++ if (vk) { ++ | beq <2 // No metatable? ++ } else { ++ | beq >2 // No metatable? ++ } ++ | ldrb RA, TAB:RA->nomm ++ | mov CARG4, #1-vk // ne = 0 or 1. ++ | mov CARG2, CARG1 ++ | tst RA, #1<vmeta_equal // 'no __eq' flag not set? ++ if (vk) { ++ | b <2 ++ } else { ++ |2: // Branch if different. ++ | sub PC, RB, #0x20000 ++ |1: // Same. ++ | ins_next ++ } ++ break; ++ ++ case BC_ISEQS: case BC_ISNES: ++ vk = op == BC_ISEQS; ++ | // RA = src*8, RC = str_const (~), JMP with RC = target ++ | mvn RC, RC ++ | ldrd_i CARG1, CARG2, BASE, RA ++ | ldrh RB, [PC, #2] ++ | ldr STR:CARG3, [KBASE, RC, lsl #2] ++ | add PC, PC, #4 ++ | add RB, PC, RB, lsl #2 ++ | checktp CARG2, LJ_TSTR ++ |.if FFI ++ | bne >7 ++ | cmp CARG1, CARG3 ++ |.else ++ | it eq ++ | cmpeq CARG1, CARG3 ++ |.endif ++ if (vk) { ++ | it eq ++ | subeq PC, RB, #0x20000 ++ |1: ++ } else { ++ |1: ++ | it ne ++ | subne PC, RB, #0x20000 ++ } ++ | ins_next ++ | ++ |.if FFI ++ |7: ++ | checktp CARG2, LJ_TCDATA ++ | bne <1 ++ | b ->vmeta_equal_cd ++ |.endif ++ break; ++ ++ case BC_ISEQN: case BC_ISNEN: ++ vk = op == BC_ISEQN; ++ | // RA = src*8, RC = num_const (~), JMP with RC = target ++ | lsl RC, RC, #3 ++ | ldrd_iw CARG1, CARG2, RA, BASE ++ | ldrh RB, [PC, #2] ++ | ldrd_iw CARG3, CARG4, RC, KBASE ++ | add PC, PC, #4 ++ | add RB, PC, RB, lsl #2 ++ if (vk) { ++ |->BC_ISEQN_Z: ++ } else { ++ |->BC_ISNEN_Z: ++ } ++ | checktp CARG2, LJ_TISNUM ++ | bne >3 ++ | checktp CARG4, LJ_TISNUM ++ | bne >4 ++ | cmp CARG1, CARG3 ++ if (vk) { ++ | it eq ++ | subeq PC, RB, #0x20000 ++ |1: ++ } else { ++ |1: ++ | it ne ++ | subne PC, RB, #0x20000 ++ } ++ |2: ++ | ins_next ++ | ++ |3: // CARG1, CARG2 is not an integer. ++ |.if FFI ++ | bhi >7 ++ |.else ++ if (!vk) { ++ | it hi ++ | subhi PC, RB, #0x20000 ++ } ++ | bhi <2 ++ |.endif ++ |.if FPU ++ | checktp CARG4, LJ_TISNUM ++ | vmov s4, CARG3 ++ | vldr d0, [RA] ++ | ite lo ++ | vldrlo d1, [RC] ++ | vcvths.f64.s32 d1, s4 ++ | b >5 ++ |4: // CARG1 is an integer, d1 is a number. ++ | vmov s4, CARG1 ++ | vldr d1, [RC] ++ | vcvt.f64.s32 d0, s4 ++ |5: // d0 and d1 are numbers. ++ | vcmp.f64 d0, d1 ++ | vmrs ++ if (vk) { ++ | it eq ++ | subeq PC, RB, #0x20000 ++ } else { ++ | it ne ++ | subne PC, RB, #0x20000 ++ } ++ | b <2 ++ |.else ++ | // CARG1, CARG2 is a number. ++ | checktp CARG4, LJ_TISNUM ++ | it lo ++ | movlo RA, RB // Save RB. ++ | blo >5 ++ | // CARG1, CARG2 is a number, CARG3 is an integer. ++ | mov CARG1, CARG3 ++ | mov RC, RA ++ |4: // CARG1 is an integer, CARG3, CARG4 is a number. ++ | mov RA, RB // Save RB. ++ | bl extern __aeabi_i2d ++ | ldrd CARG3, CARG4, [RC] // Restore other operand. ++ |5: // CARG1, CARG2 and CARG3, CARG4 are numbers. ++ | bl extern __aeabi_cdcmpeq ++ if (vk) { ++ | it eq ++ | subeq PC, RA, #0x20000 ++ } else { ++ | it ne ++ | subne PC, RA, #0x20000 ++ } ++ | b <2 ++ |.endif ++ | ++ |.if FFI ++ |7: ++ | checktp CARG2, LJ_TCDATA ++ | bne <1 ++ | b ->vmeta_equal_cd ++ |.endif ++ break; ++ ++ case BC_ISEQP: case BC_ISNEP: ++ vk = op == BC_ISEQP; ++ | // RA = src*8, RC = primitive_type (~), JMP with RC = target ++ | ldrd_i CARG1, CARG2, BASE, RA ++ | ldrh RB, [PC, #2] ++ | add PC, PC, #4 ++ | mvn RC, RC ++ | add RB, PC, RB, lsl #2 ++ |.if FFI ++ | checktp CARG2, LJ_TCDATA ++ | beq ->vmeta_equal_cd ++ |.endif ++ | cmp CARG2, RC ++ if (vk) { ++ | it eq ++ | subeq PC, RB, #0x20000 ++ } else { ++ | it ne ++ | subne PC, RB, #0x20000 ++ } ++ | ins_next ++ break; ++ ++ /* -- Unary test and copy ops ------------------------------------------- */ ++ ++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: ++ | // RA = dst*8 or unused, RC = src, JMP with RC = target ++ | add RC, BASE, RC, lsl #3 ++ | ldrh RB, [PC, #2] ++ | ldrd CARG1, CARG2, [RC] ++ | add PC, PC, #4 ++ | add RB, PC, RB, lsl #2 ++ | checktp CARG2, LJ_TTRUE ++ if (op == BC_ISTC || op == BC_IST) { ++ | it ls ++ | subls PC, RB, #0x20000 ++ if (op == BC_ISTC) { ++ | it ls ++ | strdls_i CARG1, CARG2, BASE, RA ++ } ++ } else { ++ | it hi ++ | subhi PC, RB, #0x20000 ++ if (op == BC_ISFC) { ++ | it hi ++ | strdhi_i CARG1, CARG2, BASE, RA ++ } ++ } ++ | ins_next ++ break; ++ ++ case BC_ISTYPE: ++ | // RA = src*8, RC = -type ++ | ldrd_i CARG1, CARG2, BASE, RA ++ | ins_next1 ++ | cmn CARG2, RC ++ | ins_next2 ++ | bne ->vmeta_istype ++ | ins_next3 ++ break; ++ case BC_ISNUM: ++ | // RA = src*8, RC = -(TISNUM-1) ++ | ldrd_i CARG1, CARG2, BASE, RA ++ | ins_next1 ++ | checktp CARG2, LJ_TISNUM ++ | ins_next2 ++ | bhs ->vmeta_istype ++ | ins_next3 ++ break; ++ ++ /* -- Unary ops --------------------------------------------------------- */ ++ ++ case BC_MOV: ++ | // RA = dst*8, RC = src ++ | lsl RC, RC, #3 ++ | ins_next1 ++ | ldrd_i CARG1, CARG2, BASE, RC ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ break; ++ case BC_NOT: ++ | // RA = dst*8, RC = src ++ | add RC, BASE, RC, lsl #3 ++ | ins_next1 ++ | ldr CARG1, [RC, #4] ++ | add RA, BASE, RA ++ | ins_next2 ++ | checktp CARG1, LJ_TTRUE ++ | ite ls ++ | mvnls CARG2, #~LJ_TFALSE ++ | mvnhi CARG2, #~LJ_TTRUE ++ | str CARG2, [RA, #4] ++ | ins_next3 ++ break; ++ case BC_UNM: ++ | // RA = dst*8, RC = src ++ | lsl RC, RC, #3 ++ | ldrd_i CARG1, CARG2, BASE, RC ++ | ins_next1 ++ | ins_next2 ++ | checktp CARG2, LJ_TISNUM ++ | bhi ->vmeta_unm ++ | it ne ++ | eorne CARG2, CARG2, #0x80000000 ++ | bne >5 ++ | it eq ++ | rsbseq CARG1, CARG1, #0 ++ | it vs ++ | ldrdvs CARG1, CARG2, >9 ++ |5: ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ | ++ |.align 8 ++ |9: ++ | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!! ++ break; ++ case BC_LEN: ++ | // RA = dst*8, RC = src ++ | lsl RC, RC, #3 ++ | ldrd_i CARG1, CARG2, BASE, RC ++ | checkstr CARG2, >2 ++ | ldr CARG1, STR:CARG1->len ++ |1: ++ | mvn CARG2, #~LJ_TISNUM ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ |2: ++ | checktab CARG2, ->vmeta_len ++#if LJ_52 ++ | ldr TAB:CARG3, TAB:CARG1->metatable ++ | cmp TAB:CARG3, #0 ++ | bne >9 ++ |3: ++#endif ++ |->BC_LEN_Z: ++ | .IOS mov RC, BASE ++ | bl extern lj_tab_len // (GCtab *t) ++ | // Returns uint32_t (but less than 2^31). ++ | .IOS mov BASE, RC ++ | b <1 ++#if LJ_52 ++ |9: ++ | ldrb CARG4, TAB:CARG3->nomm ++ | tst CARG4, #1<vmeta_len ++#endif ++ break; ++ ++ /* -- Binary ops -------------------------------------------------------- */ ++ ++ |.macro ins_arithcheck, cond, ncond, target ++ ||if (vk == 1) { ++ | cmn CARG4, #-LJ_TISNUM ++ | it cond ++ | cmn..cond CARG2, #-LJ_TISNUM ++ ||} else { ++ | cmn CARG2, #-LJ_TISNUM ++ | it cond ++ | cmn..cond CARG4, #-LJ_TISNUM ++ ||} ++ | b..ncond target ++ |.endmacro ++ |.macro ins_arithcheck_int, target ++ | ins_arithcheck eq, ne, target ++ |.endmacro ++ |.macro ins_arithcheck_num, target ++ | ins_arithcheck lo, hs, target ++ |.endmacro ++ | ++ |.macro ins_arithpre ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ++ ||switch (vk) { ++ ||case 0: ++ | .if FPU ++ | ldrd_iw CARG1, CARG2, RB, BASE ++ | ldrd_iw CARG3, CARG4, RC, KBASE ++ | .else ++ | ldrd_i CARG1, CARG2, BASE, RB ++ | ldrd_i CARG3, CARG4, KBASE, RC ++ | .endif ++ || break; ++ ||case 1: ++ | .if FPU ++ | ldrd_iw CARG3, CARG4, RB, BASE ++ | ldrd_iw CARG1, CARG2, RC, KBASE ++ | .else ++ | ldrd_i CARG3, CARG4, BASE, RB ++ | ldrd_i CARG1, CARG2, KBASE, RC ++ | .endif ++ || break; ++ ||default: ++ | .if FPU ++ | ldrd_iw CARG1, CARG2, RB, BASE ++ | ldrd_iw CARG3, CARG4, RC, BASE ++ | .else ++ | ldrd_i CARG1, CARG2, BASE, RB ++ | ldrd_i CARG3, CARG4, BASE, RC ++ | .endif ++ || break; ++ ||} ++ |.endmacro ++ | ++ |.macro ins_arithpre_fpu, reg1, reg2 ++ |.if FPU ++ ||if (vk == 1) { ++ | vldr reg2, [RB] ++ | vldr reg1, [RC] ++ ||} else { ++ | vldr reg1, [RB] ++ | vldr reg2, [RC] ++ ||} ++ |.endif ++ |.endmacro ++ | ++ |.macro ins_arithpost_fpu, reg ++ | ins_next1 ++ | add RA, BASE, RA ++ | ins_next2 ++ | vstr reg, [RA] ++ | ins_next3 ++ |.endmacro ++ | ++ |.macro ins_arithfallback, ins ++ ||switch (vk) { ++ ||case 0: ++ | ins ->vmeta_arith_vn ++ || break; ++ ||case 1: ++ | ins ->vmeta_arith_nv ++ || break; ++ ||default: ++ | ins ->vmeta_arith_vv ++ || break; ++ ||} ++ |.endmacro ++ | ++ |.macro ins_arithdn, intins, fpins, fpcall ++ | ins_arithpre ++ |.if "intins" ~= "vm_modi" and not FPU ++ | ins_next1 ++ |.endif ++ | ins_arithcheck_int >5 ++ |.if "intins" == "smull" ++ | smull CARG1, RC, CARG3, CARG1 ++ | cmp RC, CARG1, asr #31 ++ | ins_arithfallback bne ++ |.elif "intins" == "vm_modi" ++ | movs CARG2, CARG3 ++ | ins_arithfallback beq ++ | bl ->vm_modi ++ | mvn CARG2, #~LJ_TISNUM ++ |.else ++ | intins CARG1, CARG1, CARG3 ++ | ins_arithfallback bvs ++ |.endif ++ |4: ++ |.if "intins" == "vm_modi" or FPU ++ | ins_next1 ++ |.endif ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ |5: // FP variant. ++ | ins_arithpre_fpu d6, d7 ++ | ins_arithfallback ins_arithcheck_num ++ |.if FPU ++ |.if "intins" == "vm_modi" ++ | bl fpcall ++ |.else ++ | fpins d6, d6, d7 ++ |.endif ++ | ins_arithpost_fpu d6 ++ |.else ++ | bl fpcall ++ |.if "intins" ~= "vm_modi" ++ | ins_next1 ++ |.endif ++ | b <4 ++ |.endif ++ |.endmacro ++ | ++ |.macro ins_arithfp, fpins, fpcall ++ | ins_arithpre ++ |.if "fpins" ~= "extern" or HFABI ++ | ins_arithpre_fpu d0, d1 ++ |.endif ++ | ins_arithfallback ins_arithcheck_num ++ |.if "fpins" == "extern" ++ | .IOS mov RC, BASE ++ | bl fpcall ++ | .IOS mov BASE, RC ++ |.elif FPU ++ | fpins d0, d0, d1 ++ |.else ++ | bl fpcall ++ |.endif ++ |.if ("fpins" ~= "extern" or HFABI) and FPU ++ | ins_arithpost_fpu d0 ++ |.else ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ |.endif ++ |.endmacro ++ ++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: ++ | ins_arithdn adds, vadd.f64, extern __aeabi_dadd ++ break; ++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: ++ | ins_arithdn subs, vsub.f64, extern __aeabi_dsub ++ break; ++ case BC_MULVN: case BC_MULNV: case BC_MULVV: ++ | ins_arithdn smull, vmul.f64, extern __aeabi_dmul ++ break; ++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: ++ | ins_arithfp vdiv.f64, extern __aeabi_ddiv ++ break; ++ case BC_MODVN: case BC_MODNV: case BC_MODVV: ++ | ins_arithdn vm_modi, vm_mod, ->vm_mod ++ break; ++ case BC_POW: ++ | // NYI: (partial) integer arithmetic. ++ | ins_arithfp extern, extern pow ++ break; ++ ++ case BC_CAT: ++ | decode_RB8 RC, INS ++ | decode_RC8 RB, INS ++ | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!) ++ | sub CARG3, RB, RC ++ | str BASE, L->base ++ | add CARG2, BASE, RB ++ |->BC_CAT_Z: ++ | // RA = dst*8, RC = src_start*8, CARG2 = top-1 ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | lsr CARG3, CARG3, #3 ++ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | ldr BASE, L->base ++ | cmp CRET1, #0 ++ | bne ->vmeta_binop ++ | ldrd_i CARG3, CARG4, BASE, RC ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG3, CARG4, BASE, RA // Copy result to RA. ++ | ins_next3 ++ break; ++ ++ /* -- Constant ops ------------------------------------------------------ */ ++ ++ case BC_KSTR: ++ | // RA = dst*8, RC = str_const (~) ++ | mvn RC, RC ++ | ins_next1 ++ | ldr CARG1, [KBASE, RC, lsl #2] ++ | mvn CARG2, #~LJ_TSTR ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ break; ++ case BC_KCDATA: ++ |.if FFI ++ | // RA = dst*8, RC = cdata_const (~) ++ | mvn RC, RC ++ | ins_next1 ++ | ldr CARG1, [KBASE, RC, lsl #2] ++ | mvn CARG2, #~LJ_TCDATA ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ |.endif ++ break; ++ case BC_KSHORT: ++ | // RA = dst*8, (RC = int16_literal) ++ | mov CARG1, INS, asr #16 // Refetch sign-extended reg. ++ | mvn CARG2, #~LJ_TISNUM ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ break; ++ case BC_KNUM: ++ | // RA = dst*8, RC = num_const ++ | lsl RC, RC, #3 ++ | ins_next1 ++ | ldrd_i CARG1, CARG2, KBASE, RC ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ break; ++ case BC_KPRI: ++ | // RA = dst*8, RC = primitive_type (~) ++ | add RA, BASE, RA ++ | mvn RC, RC ++ | ins_next1 ++ | ins_next2 ++ | str RC, [RA, #4] ++ | ins_next3 ++ break; ++ case BC_KNIL: ++ | // RA = base*8, RC = end ++ | add RA, BASE, RA ++ | add RC, BASE, RC, lsl #3 ++ | mvn CARG1, #~LJ_TNIL ++ | str CARG1, [RA, #4] ++ | add RA, RA, #8 ++ |1: ++ | str CARG1, [RA, #4] ++ | cmp RA, RC ++ | add RA, RA, #8 ++ | blt <1 ++ | ins_next_ ++ break; ++ ++ /* -- Upvalue and function ops ------------------------------------------ */ ++ ++ case BC_UGET: ++ | // RA = dst*8, RC = uvnum ++ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] ++ | lsl RC, RC, #2 ++ | add RC, RC, #offsetof(GCfuncL, uvptr) ++ | ldr UPVAL:CARG2, [LFUNC:CARG2, RC] ++ | ldr CARG2, UPVAL:CARG2->v ++ | ldrd CARG3, CARG4, [CARG2] ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG3, CARG4, BASE, RA ++ | ins_next3 ++ break; ++ case BC_USETV: ++ | // RA = uvnum*8, RC = src ++ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] ++ | lsr RA, RA, #1 ++ | add RA, RA, #offsetof(GCfuncL, uvptr) ++ | lsl RC, RC, #3 ++ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] ++ | ldrd_i CARG3, CARG4, BASE, RC ++ | ldrb RB, UPVAL:CARG2->marked ++ | ldrb RC, UPVAL:CARG2->closed ++ | ldr CARG2, UPVAL:CARG2->v ++ | tst RB, #LJ_GC_BLACK // isblack(uv) ++ | add RB, CARG4, #-LJ_TISGCV ++ | it ne ++ | cmpne RC, #0 ++ | strd CARG3, CARG4, [CARG2] ++ | bne >2 // Upvalue is closed and black? ++ |1: ++ | ins_next ++ | ++ |2: // Check if new value is collectable. ++ | cmn RB, #-(LJ_TNUMX - LJ_TISGCV) ++ | it hi ++ | ldrbhi RC, GCOBJ:CARG3->gch.marked ++ | bls <1 // tvisgcv(v) ++ | sub CARG1, DISPATCH, #-GG_DISP2G ++ | tst RC, #LJ_GC_WHITES ++ | // Crossed a write barrier. Move the barrier forward. ++ |.if IOS ++ | beq <1 ++ | mov RC, BASE ++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | mov BASE, RC ++ |.else ++ | it ne ++ | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ |.endif ++ | b <1 ++ break; ++ case BC_USETS: ++ | // RA = uvnum*8, RC = str_const (~) ++ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] ++ | lsr RA, RA, #1 ++ | add RA, RA, #offsetof(GCfuncL, uvptr) ++ | mvn RC, RC ++ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] ++ | ldr STR:CARG3, [KBASE, RC, lsl #2] ++ | ldrb RB, UPVAL:CARG2->marked ++ | ldrb RC, UPVAL:CARG2->closed ++ | ldr CARG2, UPVAL:CARG2->v ++ | mvn CARG4, #~LJ_TSTR ++ | tst RB, #LJ_GC_BLACK // isblack(uv) ++ | ldrb RB, STR:CARG3->marked ++ | strd CARG3, CARG4, [CARG2] ++ | bne >2 ++ |1: ++ | ins_next ++ | ++ |2: // Check if string is white and ensure upvalue is closed. ++ | tst RB, #LJ_GC_WHITES // iswhite(str) ++ | it ne ++ | cmpne RC, #0 ++ | sub CARG1, DISPATCH, #-GG_DISP2G ++ | // Crossed a write barrier. Move the barrier forward. ++ |.if IOS ++ | beq <1 ++ | mov RC, BASE ++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | mov BASE, RC ++ |.else ++ | it ne ++ | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ |.endif ++ | b <1 ++ break; ++ case BC_USETN: ++ | // RA = uvnum*8, RC = num_const ++ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] ++ | lsr RA, RA, #1 ++ | add RA, RA, #offsetof(GCfuncL, uvptr) ++ | lsl RC, RC, #3 ++ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] ++ | ldrd_i CARG3, CARG4, KBASE, RC ++ | ldr CARG2, UPVAL:CARG2->v ++ | ins_next1 ++ | ins_next2 ++ | strd CARG3, CARG4, [CARG2] ++ | ins_next3 ++ break; ++ case BC_USETP: ++ | // RA = uvnum*8, RC = primitive_type (~) ++ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] ++ | lsr RA, RA, #1 ++ | add RA, RA, #offsetof(GCfuncL, uvptr) ++ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] ++ | mvn RC, RC ++ | ldr CARG2, UPVAL:CARG2->v ++ | ins_next1 ++ | ins_next2 ++ | str RC, [CARG2, #4] ++ | ins_next3 ++ break; ++ ++ case BC_UCLO: ++ | // RA = level*8, RC = target ++ | ldr CARG3, L->openupval ++ | add RC, PC, RC, lsl #2 ++ | str BASE, L->base ++ | cmp CARG3, #0 ++ | sub PC, RC, #0x20000 ++ | beq >1 ++ | mov CARG1, L ++ | add CARG2, BASE, RA ++ | bl extern lj_func_closeuv // (lua_State *L, TValue *level) ++ | ldr BASE, L->base ++ |1: ++ | ins_next ++ break; ++ ++ case BC_FNEW: ++ | // RA = dst*8, RC = proto_const (~) (holding function prototype) ++ | mvn RC, RC ++ | str BASE, L->base ++ | ldr CARG2, [KBASE, RC, lsl #2] ++ | str PC, SAVE_PC ++ | ldr CARG3, [BASE, FRAME_FUNC] ++ | mov CARG1, L ++ | // (lua_State *L, GCproto *pt, GCfuncL *parent) ++ | bl extern lj_func_newL_gc ++ | // Returns GCfuncL *. ++ | ldr BASE, L->base ++ | mvn CARG2, #~LJ_TFUNC ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ break; ++ ++ /* -- Table ops --------------------------------------------------------- */ ++ ++ case BC_TNEW: ++ case BC_TDUP: ++ | // RA = dst*8, RC = (hbits|asize) | tab_const (~) ++ if (op == BC_TDUP) { ++ | mvn RC, RC ++ } ++ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total) ++ | ldr CARG3, [CARG1] ++ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.threshold) ++ | ldr CARG4, [CARG1] ++ | str BASE, L->base ++ | str PC, SAVE_PC ++ | cmp CARG3, CARG4 ++ | mov CARG1, L ++ | bhs >5 ++ |1: ++ if (op == BC_TNEW) { ++ | lsl CARG2, RC, #21 ++ | lsr CARG3, RC, #11 ++ | asr RC, CARG2, #21 ++ | lsr CARG2, CARG2, #21 ++ | cmn RC, #1 ++ | it eq ++ | addeq CARG2, CARG2, #2 ++ | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) ++ | // Returns GCtab *. ++ } else { ++ | ldr CARG2, [KBASE, RC, lsl #2] ++ | bl extern lj_tab_dup // (lua_State *L, Table *kt) ++ | // Returns GCtab *. ++ } ++ | ldr BASE, L->base ++ | mvn CARG2, #~LJ_TTAB ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ |5: ++ | bl extern lj_gc_step_fixtop // (lua_State *L) ++ | mov CARG1, L ++ | b <1 ++ break; ++ ++ case BC_GGET: ++ | // RA = dst*8, RC = str_const (~) ++ case BC_GSET: ++ | // RA = dst*8, RC = str_const (~) ++ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] ++ | mvn RC, RC ++ | ldr TAB:CARG1, LFUNC:CARG2->env ++ | ldr STR:RC, [KBASE, RC, lsl #2] ++ if (op == BC_GGET) { ++ | b ->BC_TGETS_Z ++ } else { ++ | b ->BC_TSETS_Z ++ } ++ break; ++ ++ case BC_TGETV: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | ldrd_i TAB:CARG1, CARG2, BASE, RB ++ | ldrd_i CARG3, CARG4, BASE, RC ++ | checktab CARG2, ->vmeta_tgetv // STALL: load CARG1, CARG2. ++ | checktp CARG4, LJ_TISNUM // Integer key? ++ | it eq ++ | ldreq CARG4, TAB:CARG1->array ++ | it eq ++ | ldreq CARG2, TAB:CARG1->asize ++ | bne >9 ++ | ++ | add CARG4, CARG4, CARG3, lsl #3 ++ | cmp CARG3, CARG2 // In array part? ++ | it lo ++ | ldrdlo CARG3, CARG4, [CARG4] ++ | bhs ->vmeta_tgetv ++ | ins_next1 // Overwrites RB! ++ | checktp CARG4, LJ_TNIL ++ | beq >5 ++ |1: ++ | ins_next2 ++ | strd_i CARG3, CARG4, BASE, RA ++ | ins_next3 ++ | ++ |5: // Check for __index if table value is nil. ++ | ldr TAB:CARG2, TAB:CARG1->metatable ++ | cmp TAB:CARG2, #0 ++ | beq <1 // No metatable: done. ++ | ldrb CARG2, TAB:CARG2->nomm ++ | tst CARG2, #1<vmeta_tgetv ++ | ++ |9: ++ | checktp CARG4, LJ_TSTR // String key? ++ | it eq ++ | moveq STR:RC, CARG3 ++ | beq ->BC_TGETS_Z ++ | b ->vmeta_tgetv ++ break; ++ case BC_TGETS: ++ | decode_RB8 RB, INS ++ | and RC, RC, #255 ++ | // RA = dst*8, RB = table*8, RC = str_const (~) ++ | ldrd_i CARG1, CARG2, BASE, RB ++ | mvn RC, RC ++ | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. ++ | checktab CARG2, ->vmeta_tgets1 ++ |->BC_TGETS_Z: ++ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 ++ | ldr CARG3, TAB:CARG1->hmask ++ | ldr CARG4, STR:RC->sid ++ | ldr NODE:INS, TAB:CARG1->node ++ | mov TAB:RB, TAB:CARG1 ++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask ++ | add CARG3, CARG3, CARG3, lsl #1 ++ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 ++ |1: ++ | ldrd CARG1, CARG2, NODE:INS->key // STALL: early NODE:INS. ++ | ldrd CARG3, CARG4, NODE:INS->val ++ | ldr NODE:INS, NODE:INS->next ++ | checktp CARG2, LJ_TSTR ++ | it eq ++ | cmpeq CARG1, STR:RC ++ | bne >4 ++ | checktp CARG4, LJ_TNIL ++ | beq >5 ++ |3: ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG3, CARG4, BASE, RA ++ | ins_next3 ++ | ++ |4: // Follow hash chain. ++ | cmp NODE:INS, #0 ++ | bne <1 ++ | // End of hash chain: key not found, nil result. ++ | ++ |5: // Check for __index if table value is nil. ++ | ldr TAB:CARG1, TAB:RB->metatable ++ | mov CARG3, #0 // Optional clear of undef. value (during load stall). ++ | mvn CARG4, #~LJ_TNIL ++ | cmp TAB:CARG1, #0 ++ | beq <3 // No metatable: done. ++ | ldrb CARG2, TAB:CARG1->nomm ++ | tst CARG2, #1<vmeta_tgets ++ break; ++ case BC_TGETB: ++ | decode_RB8 RB, INS ++ | and RC, RC, #255 ++ | // RA = dst*8, RB = table*8, RC = index ++ | ldrd_i CARG1, CARG2, BASE, RB ++ | checktab CARG2, ->vmeta_tgetb // STALL: load CARG1, CARG2. ++ | ldr CARG3, TAB:CARG1->asize ++ | ldr CARG4, TAB:CARG1->array ++ | lsl CARG2, RC, #3 ++ | cmp RC, CARG3 ++ | ldrdlo_i CARG3, CARG4, CARG4, CARG2 ++ | bhs ->vmeta_tgetb ++ | ins_next1 // Overwrites RB! ++ | checktp CARG4, LJ_TNIL ++ | beq >5 ++ |1: ++ | ins_next2 ++ | strd_i CARG3, CARG4, BASE, RA ++ | ins_next3 ++ | ++ |5: // Check for __index if table value is nil. ++ | ldr TAB:CARG2, TAB:CARG1->metatable ++ | cmp TAB:CARG2, #0 ++ | beq <1 // No metatable: done. ++ | ldrb CARG2, TAB:CARG2->nomm ++ | tst CARG2, #1<vmeta_tgetb ++ break; ++ case BC_TGETR: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | ldr TAB:CARG1, [BASE, RB] ++ | ldr CARG2, [BASE, RC] ++ | ldr CARG4, TAB:CARG1->array ++ | ldr CARG3, TAB:CARG1->asize ++ | add CARG4, CARG4, CARG2, lsl #3 ++ | cmp CARG2, CARG3 // In array part? ++ | bhs ->vmeta_tgetr ++ | ldrd CARG1, CARG2, [CARG4] ++ |->BC_TGETR_Z: ++ | ins_next1 ++ | ins_next2 ++ | strd_i CARG1, CARG2, BASE, RA ++ | ins_next3 ++ break; ++ ++ case BC_TSETV: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | // RA = src*8, RB = table*8, RC = key*8 ++ | ldrd_i TAB:CARG1, CARG2, BASE, RB ++ | ldrd_i CARG3, CARG4, BASE, RC ++ | checktab CARG2, ->vmeta_tsetv // STALL: load CARG1, CARG2. ++ | checktp CARG4, LJ_TISNUM // Integer key? ++ | it eq ++ | ldreq CARG2, TAB:CARG1->array ++ | it eq ++ | ldreq CARG4, TAB:CARG1->asize ++ | bne >9 ++ | ++ | add CARG2, CARG2, CARG3, lsl #3 ++ | cmp CARG3, CARG4 // In array part? ++ | it lo ++ | ldrlo INS, [CARG2, #4] ++ | bhs ->vmeta_tsetv ++ | ins_next1 // Overwrites RB! ++ | checktp INS, LJ_TNIL ++ | ldrb INS, TAB:CARG1->marked ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | beq >5 ++ |1: ++ | tst INS, #LJ_GC_BLACK // isblack(table) ++ | strd CARG3, CARG4, [CARG2] ++ | bne >7 ++ |2: ++ | ins_next2 ++ | ins_next3 ++ | ++ |5: // Check for __newindex if previous value is nil. ++ | ldr TAB:RA, TAB:CARG1->metatable ++ | cmp TAB:RA, #0 ++ | beq <1 // No metatable: done. ++ | ldrb RA, TAB:RA->nomm ++ | tst RA, #1<vmeta_tsetv ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG1, INS, CARG3 ++ | b <2 ++ | ++ |9: ++ | checktp CARG4, LJ_TSTR // String key? ++ | it eq ++ | moveq STR:RC, CARG3 ++ | beq ->BC_TSETS_Z ++ | b ->vmeta_tsetv ++ break; ++ case BC_TSETS: ++ | decode_RB8 RB, INS ++ | and RC, RC, #255 ++ | // RA = src*8, RB = table*8, RC = str_const (~) ++ | ldrd_i CARG1, CARG2, BASE, RB ++ | mvn RC, RC ++ | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. ++ | checktab CARG2, ->vmeta_tsets1 ++ |->BC_TSETS_Z: ++ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 ++ | ldr CARG3, TAB:CARG1->hmask ++ | ldr CARG4, STR:RC->sid ++ | ldr NODE:INS, TAB:CARG1->node ++ | mov TAB:RB, TAB:CARG1 ++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask ++ | add CARG3, CARG3, CARG3, lsl #1 ++ | mov CARG4, #0 ++ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 ++ | strb CARG4, TAB:RB->nomm // Clear metamethod cache. ++ |1: ++ | ldrd CARG1, CARG2, NODE:INS->key ++ | ldr CARG4, NODE:INS->val.it ++ | ldr NODE:CARG3, NODE:INS->next ++ | checktp CARG2, LJ_TSTR ++ | it eq ++ | cmpeq CARG1, STR:RC ++ | bne >5 ++ | ldrb CARG2, TAB:RB->marked ++ | checktp CARG4, LJ_TNIL // Key found, but nil value? ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | beq >4 ++ |2: ++ | tst CARG2, #LJ_GC_BLACK // isblack(table) ++ | strd CARG3, CARG4, NODE:INS->val ++ | bne >7 ++ |3: ++ | ins_next ++ | ++ |4: // Check for __newindex if previous value is nil. ++ | ldr TAB:CARG1, TAB:RB->metatable ++ | cmp TAB:CARG1, #0 ++ | beq <2 // No metatable: done. ++ | ldrb CARG1, TAB:CARG1->nomm ++ | tst CARG1, #1<vmeta_tsets ++ | ++ |5: // Follow hash chain. ++ | movs NODE:INS, NODE:CARG3 ++ | bne <1 ++ | // End of hash chain: key not found, add a new one. ++ | ++ | // But check for __newindex first. ++ | ldr TAB:CARG1, TAB:RB->metatable ++ | mov CARG3, TMPDp ++ | str PC, SAVE_PC ++ | cmp TAB:CARG1, #0 // No metatable: continue. ++ | str BASE, L->base ++ | it ne ++ | ldrbne CARG2, TAB:CARG1->nomm ++ | mov CARG1, L ++ | beq >6 ++ | tst CARG2, #1<vmeta_tsets // 'no __newindex' flag NOT set: check. ++ |6: ++ | mvn CARG4, #~LJ_TSTR ++ | str STR:RC, TMPDlo ++ | mov CARG2, TAB:RB ++ | str CARG4, TMPDhi ++ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) ++ | // Returns TValue *. ++ | ldr BASE, L->base ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | strd CARG3, CARG4, [CRET1] ++ | b <3 // No 2nd write barrier needed. ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, CARG2, CARG3 ++ | b <3 ++ break; ++ case BC_TSETB: ++ | decode_RB8 RB, INS ++ | and RC, RC, #255 ++ | // RA = src*8, RB = table*8, RC = index ++ | ldrd_i CARG1, CARG2, BASE, RB ++ | checktab CARG2, ->vmeta_tsetb // STALL: load CARG1, CARG2. ++ | ldr CARG3, TAB:CARG1->asize ++ | ldr RB, TAB:CARG1->array ++ | lsl CARG2, RC, #3 ++ | cmp RC, CARG3 ++ | ldrdlo_iw CARG3, CARG4, CARG2, RB ++ | bhs ->vmeta_tsetb ++ | ins_next1 // Overwrites RB! ++ | checktp CARG4, LJ_TNIL ++ | ldrb INS, TAB:CARG1->marked ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | beq >5 ++ |1: ++ | tst INS, #LJ_GC_BLACK // isblack(table) ++ | strd CARG3, CARG4, [CARG2] ++ | bne >7 ++ |2: ++ | ins_next2 ++ | ins_next3 ++ | ++ |5: // Check for __newindex if previous value is nil. ++ | ldr TAB:RA, TAB:CARG1->metatable ++ | cmp TAB:RA, #0 ++ | beq <1 // No metatable: done. ++ | ldrb RA, TAB:RA->nomm ++ | tst RA, #1<vmeta_tsetb ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG1, INS, CARG3 ++ | b <2 ++ break; ++ case BC_TSETR: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | // RA = src*8, RB = table*8, RC = key*8 ++ | ldr TAB:CARG2, [BASE, RB] ++ | ldr CARG3, [BASE, RC] ++ | ldrb INS, TAB:CARG2->marked ++ | ldr CARG1, TAB:CARG2->array ++ | ldr CARG4, TAB:CARG2->asize ++ | tst INS, #LJ_GC_BLACK // isblack(table) ++ | add CARG1, CARG1, CARG3, lsl #3 ++ | bne >7 ++ |2: ++ | cmp CARG3, CARG4 // In array part? ++ | bhs ->vmeta_tsetr ++ |->BC_TSETR_Z: ++ | ldrd_i CARG3, CARG4, BASE, RA ++ | ins_next1 ++ | ins_next2 ++ | strd CARG3, CARG4, [CARG1] ++ | ins_next3 ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG2, INS, RB ++ | b <2 ++ break; ++ ++ case BC_TSETM: ++ | // RA = base*8 (table at base-1), RC = num_const (start index) ++ | add RA, BASE, RA ++ |1: ++ | ldr RB, SAVE_MULTRES ++ | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. ++ | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. ++ | subs RB, RB, #8 ++ | ldr CARG4, TAB:CARG2->asize ++ | beq >4 // Nothing to copy? ++ | add CARG3, CARG1, RB, lsr #3 ++ | cmp CARG3, CARG4 ++ | ldr CARG4, TAB:CARG2->array ++ | add RB, RA, RB ++ | bhi >5 ++ | add INS, CARG4, CARG1, lsl #3 ++ | ldrb CARG1, TAB:CARG2->marked ++ |3: // Copy result slots to table. ++ | ldrd CARG3, CARG4, [RA], #8 ++ | strd CARG3, CARG4, [INS], #8 ++ | cmp RA, RB ++ | blo <3 ++ | tst CARG1, #LJ_GC_BLACK // isblack(table) ++ | bne >7 ++ |4: ++ | ins_next ++ | ++ |5: // Need to resize array part. ++ | str BASE, L->base ++ | mov CARG1, L ++ | str PC, SAVE_PC ++ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) ++ | // Must not reallocate the stack. ++ | .IOS ldr BASE, L->base ++ | b <1 ++ | ++ |7: // Possible table write barrier for any value. Skip valiswhite check. ++ | barrierback TAB:CARG2, CARG1, CARG3 ++ | b <4 ++ break; ++ ++ /* -- Calls and vararg handling ----------------------------------------- */ ++ ++ case BC_CALLM: ++ | // RA = base*8, (RB = nresults+1,) RC = extra_nargs ++ | ldr CARG1, SAVE_MULTRES ++ | decode_RC8 NARGS8:RC, INS ++ | add NARGS8:RC, NARGS8:RC, CARG1 ++ | b ->BC_CALL_Z ++ break; ++ case BC_CALL: ++ | decode_RC8 NARGS8:RC, INS ++ | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8 ++ |->BC_CALL_Z: ++ | mov RB, BASE // Save old BASE for vmeta_call. ++ | ldrd_iw CARG3, CARG4, BASE, RA ++ | sub NARGS8:RC, NARGS8:RC, #8 ++ | add BASE, BASE, #8 ++ | checkfunc CARG4, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_CALLMT: ++ | // RA = base*8, (RB = 0,) RC = extra_nargs ++ | ldr CARG1, SAVE_MULTRES ++ | add NARGS8:RC, CARG1, RC, lsl #3 ++ | b ->BC_CALLT1_Z ++ break; ++ case BC_CALLT: ++ | lsl NARGS8:RC, RC, #3 ++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 ++ |->BC_CALLT1_Z: ++ | ldrd_iw LFUNC:CARG3, CARG4, RA, BASE ++ | sub NARGS8:RC, NARGS8:RC, #8 ++ | add RA, RA, #8 ++ | checkfunc CARG4, ->vmeta_callt ++ | ldr PC, [BASE, FRAME_PC] ++ |->BC_CALLT2_Z: ++ | mov RB, #0 ++ | ldrb CARG4, LFUNC:CARG3->ffid ++ | tst PC, #FRAME_TYPE ++ | bne >7 ++ |1: ++ | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC. ++ | cmp NARGS8:RC, #0 ++ | beq >3 ++ |2: ++ | ldrd_i CARG1, CARG2, RA, RB ++ | add INS, RB, #8 ++ | cmp INS, NARGS8:RC ++ | strd_i CARG1, CARG2, BASE, RB ++ | mov RB, INS ++ | bne <2 ++ |3: ++ | cmp CARG4, #1 // (> FF_C) Calling a fast function? ++ | bhi >5 ++ |4: ++ | ins_callt ++ | ++ |5: // Tailcall to a fast function with a Lua frame below. ++ | ldr INS, [PC, #-4] ++ | decode_RA8 RA, INS ++ | sub CARG1, BASE, RA ++ | ldr LFUNC:CARG1, [CARG1, #-16] ++ | ldr CARG1, LFUNC:CARG1->field_pc ++ | ldr KBASE, [CARG1, #PC2PROTO(k)] ++ | b <4 ++ | ++ |7: // Tailcall from a vararg function. ++ | eor PC, PC, #FRAME_VARG ++ | tst PC, #FRAME_TYPEP // Vararg frame below? ++ | it ne ++ | movne CARG4, #0 // Clear ffid if no Lua function below. ++ | bne <1 ++ | sub BASE, BASE, PC ++ | ldr PC, [BASE, FRAME_PC] ++ | tst PC, #FRAME_TYPE ++ | it ne ++ | movne CARG4, #0 // Clear ffid if no Lua function below. ++ | b <1 ++ break; ++ ++ case BC_ITERC: ++ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) ++ | add RA, BASE, RA ++ | mov RB, BASE // Save old BASE for vmeta_call. ++ | ldrd CARG3, CARG4, [RA, #-16] ++ | ldrd CARG1, CARG2, [RA, #-8] ++ | add BASE, RA, #8 ++ | strd CARG3, CARG4, [RA, #8] // Copy state. ++ | strd CARG1, CARG2, [RA, #16] // Copy control var. ++ | // STALL: locked CARG3, CARG4. ++ | ldrd LFUNC:CARG3, CARG4, [RA, #-24] ++ | mov NARGS8:RC, #16 // Iterators get 2 arguments. ++ | // STALL: load CARG3, CARG4. ++ | strd LFUNC:CARG3, CARG4, [RA] // Copy callable. ++ | checkfunc CARG4, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_ITERN: ++ |.if JIT ++ | hotloop ++ |.endif ++ |->vm_IITERN: ++ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) ++ | add RA, BASE, RA ++ | ldr TAB:RB, [RA, #-16] ++ | ldr CARG1, [RA, #-8] // Get index from control var. ++ | ldr INS, TAB:RB->asize ++ | ldr CARG2, TAB:RB->array ++ | add PC, PC, #4 ++ |1: // Traverse array part. ++ | subs RC, CARG1, INS ++ | add CARG3, CARG2, CARG1, lsl #3 ++ | bhs >5 // Index points after array part? ++ | ldrd CARG3, CARG4, [CARG3] ++ | checktp CARG4, LJ_TNIL ++ | it eq ++ | addeq CARG1, CARG1, #1 // Skip holes in array part. ++ | beq <1 ++ | ldrh RC, [PC, #-2] ++ | mvn CARG2, #~LJ_TISNUM ++ | strd CARG3, CARG4, [RA, #8] ++ | add RC, PC, RC, lsl #2 ++ | add RB, CARG1, #1 ++ | strd CARG1, CARG2, [RA] ++ | sub PC, RC, #0x20000 ++ | str RB, [RA, #-8] // Update control var. ++ |3: ++ | ins_next ++ | ++ |5: // Traverse hash part. ++ | ldr CARG4, TAB:RB->hmask ++ | ldr NODE:RB, TAB:RB->node ++ |6: ++ | add CARG1, RC, RC, lsl #1 ++ | cmp RC, CARG4 // End of iteration? Branch to ITERL+1. ++ | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 ++ | bhi <3 ++ | ldrd CARG1, CARG2, NODE:CARG3->val ++ | checktp CARG2, LJ_TNIL ++ | add RC, RC, #1 ++ | beq <6 // Skip holes in hash part. ++ | ldrh RB, [PC, #-2] ++ | add RC, RC, INS ++ | ldrd CARG3, CARG4, NODE:CARG3->key ++ | str RC, [RA, #-8] // Update control var. ++ | strd CARG1, CARG2, [RA, #8] ++ | add RC, PC, RB, lsl #2 ++ | sub PC, RC, #0x20000 ++ | strd CARG3, CARG4, [RA] ++ | b <3 ++ break; ++ ++ case BC_ISNEXT: ++ | // RA = base*8, RC = target (points to ITERN) ++ | add RA, BASE, RA ++ | add RC, PC, RC, lsl #2 ++ | ldrd CFUNC:CARG1, CFUNC:CARG2, [RA, #-24] ++ | ldr CARG3, [RA, #-12] ++ | ldr CARG4, [RA, #-4] ++ | checktp CARG2, LJ_TFUNC ++ | it eq ++ | ldrbeq CARG1, CFUNC:CARG1->ffid ++ | checktpeq CARG3, LJ_TTAB ++ | checktpeq CARG4, LJ_TNIL ++ | it eq ++ | cmpeq CARG1, #FF_next_N ++ | it eq ++ | subeq PC, RC, #0x20000 ++ | bne >5 ++ | ins_next1 ++ | ins_next2 ++ | mov CARG1, #0 ++ | mvn CARG2, #~LJ_KEYINDEX ++ | strd CARG1, CARG2, [RA, #-8] // Initialize control var. ++ |1: ++ | ins_next3 ++ |5: // Despecialize bytecode if any of the checks fail. ++ | mov CARG1, #BC_JMP ++ | mov OP, #BC_ITERC ++ | strb CARG1, [PC, #-4] ++ | sub PC, RC, #0x20000 ++ |.if JIT ++ | ldrb CARG1, [PC] ++ | cmp CARG1, #BC_ITERN ++ | bne >6 ++ |.endif ++ | strb OP, [PC] // Subsumes ins_next1. ++ | ins_next2 ++ | b <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | sub CARG2, DISPATCH, #-DISPATCH_J(trace) ++ | ldr CARG1, [CARG2] ++ | ldrh CARG2, [PC, #2] ++ | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] ++ | // Subsumes ins_next1 and ins_next2. ++ | ldr INS, TRACE:CARG1->startins ++ | .long 0xf36c0e07 //BFI INS, OP, #0, #8 ++ | str INS, [PC], #4 ++ | b <1 ++ |.endif ++ break; ++ ++ case BC_VARG: ++ | decode_RB8 RB, INS ++ | decode_RC8 RC, INS ++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 ++ | ldr CARG1, [BASE, FRAME_PC] ++ | add RC, BASE, RC ++ | add RA, BASE, RA ++ | add RC, RC, #FRAME_VARG ++ | add CARG4, RA, RB ++ | sub CARG3, BASE, #8 // CARG3 = vtop ++ | sub RC, RC, CARG1 // RC = vbase ++ | // Note: RC may now be even _above_ BASE if nargs was < numparams. ++ | cmp RB, #0 ++ | sub CARG1, CARG3, RC ++ | beq >5 // Copy all varargs? ++ | sub CARG4, CARG4, #16 ++ |1: // Copy vararg slots to destination slots. ++ | cmp RC, CARG3 ++ | ite lo ++ | ldrdlo CARG1, CARG2, [RC], #8 ++ | mvnhs CARG2, #~LJ_TNIL ++ | cmp RA, CARG4 ++ | strd CARG1, CARG2, [RA], #8 ++ | blo <1 ++ |2: ++ | ins_next ++ | ++ |5: // Copy all varargs. ++ | ldr CARG4, L->maxstack ++ | cmp CARG1, #0 ++ | ite le ++ | movle RB, #8 // MULTRES = (0+1)*8 ++ | addgt RB, CARG1, #8 ++ | add CARG2, RA, CARG1 ++ | str RB, SAVE_MULTRES ++ | ble <2 ++ | cmp CARG2, CARG4 ++ | bhi >7 ++ |6: ++ | ldrd CARG1, CARG2, [RC], #8 ++ | strd CARG1, CARG2, [RA], #8 ++ | cmp RC, CARG3 ++ | blo <6 ++ | b <2 ++ | ++ |7: // Grow stack for varargs. ++ | lsr CARG2, CARG1, #3 ++ | str RA, L->top ++ | mov CARG1, L ++ | str BASE, L->base ++ | sub RC, RC, BASE // Need delta, because BASE may change. ++ | str PC, SAVE_PC ++ | sub RA, RA, BASE ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ldr BASE, L->base ++ | add RA, BASE, RA ++ | add RC, BASE, RC ++ | sub CARG3, BASE, #8 ++ | b <6 ++ break; ++ ++ /* -- Returns ----------------------------------------------------------- */ ++ ++ case BC_RETM: ++ | // RA = results*8, RC = extra results ++ | ldr CARG1, SAVE_MULTRES ++ | ldr PC, [BASE, FRAME_PC] ++ | add RA, BASE, RA ++ | add RC, CARG1, RC, lsl #3 ++ | b ->BC_RETM_Z ++ break; ++ ++ case BC_RET: ++ | // RA = results*8, RC = nresults+1 ++ | ldr PC, [BASE, FRAME_PC] ++ | lsl RC, RC, #3 ++ | add RA, BASE, RA ++ |->BC_RETM_Z: ++ | str RC, SAVE_MULTRES ++ |1: ++ | ands CARG1, PC, #FRAME_TYPE ++ | eor CARG2, PC, #FRAME_VARG ++ | bne ->BC_RETV2_Z ++ | ++ |->BC_RET_Z: ++ | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return ++ | ldr INS, [PC, #-4] ++ | subs CARG4, RC, #8 ++ | sub CARG3, BASE, #8 ++ | beq >3 ++ |2: ++ | ldrd CARG1, CARG2, [RA], #8 ++ | add BASE, BASE, #8 ++ | subs CARG4, CARG4, #8 ++ | strd CARG1, CARG2, [BASE, #-16] ++ | bne <2 ++ |3: ++ | decode_RA8 RA, INS ++ | sub CARG4, CARG3, RA ++ | decode_RB8 RB, INS ++ | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] ++ |5: ++ | cmp RB, RC // More results expected? ++ | bhi >6 ++ | mov BASE, CARG4 ++ | ldr CARG2, LFUNC:CARG1->field_pc ++ | ins_next1 ++ | ins_next2 ++ | ldr KBASE, [CARG2, #PC2PROTO(k)] ++ | ins_next3 ++ | ++ |6: // Fill up results with nil. ++ | mvn CARG2, #~LJ_TNIL ++ | add BASE, BASE, #8 ++ | add RC, RC, #8 ++ | str CARG2, [BASE, #-12] ++ | b <5 ++ | ++ |->BC_RETV1_Z: // Non-standard return case. ++ | add RA, BASE, RA ++ |->BC_RETV2_Z: ++ | tst CARG2, #FRAME_TYPEP ++ | bne ->vm_return ++ | // Return from vararg function: relocate BASE down. ++ | sub BASE, BASE, CARG2 ++ | ldr PC, [BASE, FRAME_PC] ++ | b <1 ++ break; ++ ++ case BC_RET0: case BC_RET1: ++ | // RA = results*8, RC = nresults+1 ++ | ldr PC, [BASE, FRAME_PC] ++ | lsl RC, RC, #3 ++ | str RC, SAVE_MULTRES ++ | ands CARG1, PC, #FRAME_TYPE ++ | eor CARG2, PC, #FRAME_VARG ++ | it eq ++ | ldreq INS, [PC, #-4] ++ | bne ->BC_RETV1_Z ++ if (op == BC_RET1) { ++ | ldrd_i CARG1, CARG2, BASE, RA ++ } ++ | sub CARG4, BASE, #8 ++ | decode_RA8 RA, INS ++ if (op == BC_RET1) { ++ | strd CARG1, CARG2, [CARG4] ++ } ++ | sub BASE, CARG4, RA ++ | decode_RB8 RB, INS ++ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] ++ |5: ++ | cmp RB, RC ++ | bhi >6 ++ | ldr CARG2, LFUNC:CARG1->field_pc ++ | ins_next1 ++ | ins_next2 ++ | ldr KBASE, [CARG2, #PC2PROTO(k)] ++ | ins_next3 ++ | ++ |6: // Fill up results with nil. ++ | sub CARG2, CARG4, #4 ++ | mvn CARG3, #~LJ_TNIL ++ | str CARG3, [CARG2, RC] ++ | add RC, RC, #8 ++ | b <5 ++ break; ++ ++ /* -- Loops and branches ------------------------------------------------ */ ++ ++ |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] ++ |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] ++ |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] ++ |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] ++ ++ case BC_FORL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IFORL follows. ++ break; ++ ++ case BC_JFORI: ++ case BC_JFORL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_FORI: ++ case BC_IFORL: ++ | // RA = base*8, RC = target (after end of loop or start of loop) ++ vk = (op == BC_IFORL || op == BC_JFORL); ++ | ldrd_iw CARG1, CARG2, RA, BASE ++ if (op != BC_JFORL) { ++ | add RC, PC, RC, lsl #2 ++ } ++ if (!vk) { ++ | ldrd CARG3, CARG4, FOR_STOP ++ | checktp CARG2, LJ_TISNUM ++ | ldr RB, FOR_TSTEP ++ | bne >5 ++ | checktp CARG4, LJ_TISNUM ++ | ldr CARG4, FOR_STEP ++ | checktpeq RB, LJ_TISNUM ++ | bne ->vmeta_for ++ | cmp CARG4, #0 ++ | blt >4 ++ | cmp CARG1, CARG3 ++ } else { ++ | ldrd CARG3, CARG4, FOR_STEP ++ | checktp CARG2, LJ_TISNUM ++ | bne >5 ++ | adds CARG1, CARG1, CARG3 ++ | ldr CARG4, FOR_STOP ++ if (op == BC_IFORL) { ++ | it vs ++ | addvs RC, PC, #0x20000 // Overflow: prevent branch. ++ } else { ++ | bvs >2 // Overflow: do not enter mcode. ++ } ++ | cmp CARG3, #0 ++ | blt >4 ++ | cmp CARG1, CARG4 ++ } ++ |1: ++ if (op == BC_FORI) { ++ | it gt ++ | subgt PC, RC, #0x20000 ++ } else if (op == BC_JFORI) { ++ | sub PC, RC, #0x20000 ++ | it le ++ | ldrhle RC, [PC, #-2] ++ } else if (op == BC_IFORL) { ++ | it le ++ | suble PC, RC, #0x20000 ++ } ++ if (vk) { ++ | strd CARG1, CARG2, FOR_IDX ++ } ++ |2: ++ | ins_next1 ++ | ins_next2 ++ | strd CARG1, CARG2, FOR_EXT ++ if (op == BC_JFORI || op == BC_JFORL) { ++ | ble =>BC_JLOOP ++ } ++ |3: ++ | ins_next3 ++ | ++ |4: // Invert check for negative step. ++ if (!vk) { ++ | cmp CARG3, CARG1 ++ } else { ++ | cmp CARG4, CARG1 ++ } ++ | b <1 ++ | ++ |5: // FP loop. ++ if (!vk) { ++ | itt lo ++ | cmnlo CARG4, #-LJ_TISNUM ++ | cmnlo RB, #-LJ_TISNUM ++ | bhs ->vmeta_for ++ |.if FPU ++ | vldr d0, FOR_IDX ++ | vldr d1, FOR_STOP ++ | cmp RB, #0 ++ | vstr d0, FOR_EXT ++ |.else ++ | cmp RB, #0 ++ | strd CARG1, CARG2, FOR_EXT ++ | blt >8 ++ |.endif ++ } else { ++ |.if FPU ++ | vldr d0, FOR_IDX ++ | vldr d2, FOR_STEP ++ | vldr d1, FOR_STOP ++ | cmp CARG4, #0 ++ | vadd.f64 d0, d0, d2 ++ |.else ++ | cmp CARG4, #0 ++ | blt >8 ++ | bl extern __aeabi_dadd ++ | strd CARG1, CARG2, FOR_IDX ++ | ldrd CARG3, CARG4, FOR_STOP ++ | strd CARG1, CARG2, FOR_EXT ++ |.endif ++ } ++ |6: ++ |.if FPU ++ | ite ge ++ | vcmpge.f64 d0, d1 ++ | vcmplt.f64 d1, d0 ++ | vmrs ++ |.else ++ | bl extern __aeabi_cdcmple ++ |.endif ++ if (vk) { ++ |.if FPU ++ | vstr d0, FOR_IDX ++ | vstr d0, FOR_EXT ++ |.endif ++ } ++ if (op == BC_FORI) { ++ | it hi ++ | subhi PC, RC, #0x20000 ++ } else if (op == BC_JFORI) { ++ | sub PC, RC, #0x20000 ++ | it ls ++ | ldrhls RC, [PC, #-2] ++ | bls =>BC_JLOOP ++ } else if (op == BC_IFORL) { ++ | it ls ++ | subls PC, RC, #0x20000 ++ } else { ++ | bls =>BC_JLOOP ++ } ++ | ins_next1 ++ | ins_next2 ++ | b <3 ++ | ++ |.if not FPU ++ |8: // Invert check for negative step. ++ if (vk) { ++ | bl extern __aeabi_dadd ++ | strd CARG1, CARG2, FOR_IDX ++ | strd CARG1, CARG2, FOR_EXT ++ } ++ | mov CARG3, CARG1 ++ | mov CARG4, CARG2 ++ | ldrd CARG1, CARG2, FOR_STOP ++ | b <6 ++ |.endif ++ break; ++ ++ case BC_ITERL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IITERL follows. ++ break; ++ ++ case BC_JITERL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IITERL: ++ | // RA = base*8, RC = target ++ | ldrd_iw CARG1, CARG2, RA, BASE ++ if (op == BC_JITERL) { ++ | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. ++ | it ne ++ | strdne CARG1, CARG2, [RA, #-8] ++ | bne =>BC_JLOOP ++ } else { ++ | add RC, PC, RC, lsl #2 ++ | // STALL: load CARG1, CARG2. ++ | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. ++ | itt ne ++ | subne PC, RC, #0x20000 // Otherwise save control var + branch. ++ | strdne CARG1, CARG2, [RA, #-8] ++ } ++ | ins_next ++ break; ++ ++ case BC_LOOP: ++ | // RA = base*8, RC = target (loop extent) ++ | // Note: RA/RC is only used by trace recorder to determine scope/extent ++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_ILOOP follows. ++ break; ++ ++ case BC_ILOOP: ++ | // RA = base*8, RC = target (loop extent) ++ | ins_next ++ break; ++ ++ case BC_JLOOP: ++ |.if JIT ++ | // RA = base (ignored), RC = traceno ++ | sub RB, DISPATCH, #-DISPATCH_J(trace) ++ | ldr CARG1, [RB] ++ | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0. ++ | ldr TRACE:RC, [CARG1, RC, lsl #2] ++ | st_vmstate CARG2 ++ | ldr RA, TRACE:RC->mcode ++ | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] ++ | sub RB, DISPATCH, #-DISPATCH_GL(tmpbuf.L) ++ | str L, [RB] ++ | add RA, RA, #1 ++ | bx RA ++ |.endif ++ break; ++ ++ case BC_JMP: ++ | // RA = base*8 (only used by trace recorder), RC = target ++ | add RC, PC, RC, lsl #2 ++ | sub PC, RC, #0x20000 ++ | ins_next ++ break; ++ ++ /* -- Function headers -------------------------------------------------- */ ++ ++ case BC_FUNCF: ++ |.if JIT ++ | hotcall ++ |.endif ++ case BC_FUNCV: /* NYI: compiled vararg functions. */ ++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. ++ break; ++ ++ case BC_JFUNCF: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IFUNCF: ++ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 ++ | ldr CARG1, L->maxstack ++ | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)] ++ | ldr KBASE, [PC, #-4+PC2PROTO(k)] ++ | cmp RA, CARG1 ++ | bhi ->vm_growstack_l ++ if (op != BC_JFUNCF) { ++ | ins_next1 ++ | ins_next2 ++ } ++ |2: ++ | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters. ++ | mvn CARG4, #~LJ_TNIL ++ | blo >3 ++ if (op == BC_JFUNCF) { ++ | decode_RD RC, INS ++ | b =>BC_JLOOP ++ } else { ++ | ins_next3 ++ } ++ | ++ |3: // Clear missing parameters. ++ | strd_i CARG3, CARG4, BASE, NARGS8:RC ++ | add NARGS8:RC, NARGS8:RC, #8 ++ | b <2 ++ break; ++ ++ case BC_JFUNCV: ++#if !LJ_HASJIT ++ break; ++#endif ++ | NYI // NYI: compiled vararg functions ++ break; /* NYI: compiled vararg functions. */ ++ ++ case BC_IFUNCV: ++ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 ++ | ldr CARG1, L->maxstack ++ | add CARG4, BASE, RC ++ | add RA, RA, RC ++ | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC. ++ | add CARG2, RC, #8+FRAME_VARG ++ | ldr KBASE, [PC, #-4+PC2PROTO(k)] ++ | cmp RA, CARG1 ++ | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG. ++ | bhs ->vm_growstack_l ++ | ldrb RB, [PC, #-4+PC2PROTO(numparams)] ++ | mov RA, BASE ++ | mov RC, CARG4 ++ | cmp RB, #0 ++ | add BASE, CARG4, #8 ++ | beq >3 ++ | mvn CARG3, #~LJ_TNIL ++ |1: ++ | cmp RA, RC // Less args than parameters? ++ | ite lo ++ | ldrdlo CARG1, CARG2, [RA], #8 ++ | movhs CARG2, CARG3 ++ | it lo ++ | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC). ++ |2: ++ | subs RB, RB, #1 ++ | strd CARG1, CARG2, [CARG4, #8]! ++ | bne <1 ++ |3: ++ | ins_next ++ break; ++ ++ case BC_FUNCC: ++ case BC_FUNCCW: ++ | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 ++ if (op == BC_FUNCC) { ++ | ldr CARG4, CFUNC:CARG3->f ++ } else { ++ | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)] ++ } ++ | add CARG2, RA, NARGS8:RC ++ | ldr CARG1, L->maxstack ++ | add RC, BASE, NARGS8:RC ++ | str BASE, L->base ++ | cmp CARG2, CARG1 ++ | str RC, L->top ++ if (op == BC_FUNCCW) { ++ | ldr CARG2, CFUNC:CARG3->f ++ } ++ | mv_vmstate CARG3, C ++ | mov CARG1, L ++ | bhi ->vm_growstack_c // Need to grow stack. ++ | st_vmstate CARG3 ++ | blx CARG4 // (lua_State *L [, lua_CFunction f]) ++ | // Returns nresults. ++ | ldr BASE, L->base ++ | mv_vmstate CARG3, INTERP ++ | ldr CRET2, L->top ++ | str L, [DISPATCH, #DISPATCH_GL(cur_L)] ++ | lsl RC, CRET1, #3 ++ | st_vmstate CARG3 ++ | ldr PC, [BASE, FRAME_PC] ++ | sub RA, CRET2, RC // RA = L->top - nresults*8 ++ | b ->vm_returnc ++ break; ++ ++ /* ---------------------------------------------------------------------- */ ++ ++ default: ++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); ++ exit(2); ++ break; ++ } ++} ++ ++static int build_backend(BuildCtx *ctx) ++{ ++ int op; ++ ++ dasm_growpc(Dst, BC__MAX); ++ ++ build_subroutines(ctx); ++ ++ |.code_op ++ for (op = 0; op < BC__MAX; op++) ++ build_ins(ctx, (BCOp)op, op); ++ ++ return BC__MAX; ++} ++ ++/* Emit pseudo frame-info for all assembler functions. */ ++static void emit_asm_debug(BuildCtx *ctx) ++{ ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.long .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.long 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 0xe\n" /* Return address is in lr. */ ++ "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */ ++ "\t.align 2\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.long .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.long .Lframe0\n" ++ "\t.long .Lbegin\n" ++ "\t.long %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ ++ "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ ++ fcofs, CFRAME_SIZE); ++ for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); ++#if LJ_ARCH_HASFPU ++ for (i = 15; i >= 8; i--) /* offset d8-d15 */ ++ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", ++ 64+2*i, 10+2*(15-i)); ++ fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ ++#endif ++ fprintf(ctx->fp, ++ "\t.align 2\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.long .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.long .Lframe0\n" ++ "\t.long lj_vm_ffi_call\n" ++ "\t.long %d\n" ++ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ ++ "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */ ++ "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */ ++ "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */ ++ "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */ ++ "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */ ++ "\t.align 2\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++ break; ++ default: ++ break; ++ } ++} ++ diff --git a/interpreters/luajit/Kconfig b/interpreters/luajit/Kconfig new file mode 100644 index 00000000000..c39adc1e1a8 --- /dev/null +++ b/interpreters/luajit/Kconfig @@ -0,0 +1,49 @@ +# +# For a description of the syntax of this configuration file, +# see the file kconfig-language.txt in the NuttX tools repository. +# + +config INTERPRETERS_LUAJIT + tristate "LuaJIT" + default n + +if INTERPRETERS_LUAJIT + +config LUAJIT_VERSION + string "LuaJIT version" + default "03080b795aa3496ed62d4a0697c9f4767e7ca7e5" + +config LUAJIT_PRIORITY + int "LuaJIT REPL priority" + default 100 + +config LUAJIT_STACKSIZE + int "LuaJIT REPL stack size" + default 16384 + +config LUAJIT_ALLOC_START + hex "Alloc heap address" + default 0x20200000 + ---help--- + Allocation heap address for Lua objects. + +config LUAJIT_ALLOC_SIZE + hex "Alloc heap size" + default 0x80000 + ---help--- + Allocation heap size for Lua objects. + +config LUAJIT_MCODE_START + hex "MCode heap address" + default 0x00002000 + depends on ARMV7M_ITCM + ---help--- + Allocation heap address for JIT MCode traces. + +config LUAJIT_MCODE_SIZE + hex "MCode heap size" + default 0x1e000 + ---help--- + Allocation heap size for JIT MCode traces. + +endif diff --git a/interpreters/luajit/Make.defs b/interpreters/luajit/Make.defs new file mode 100644 index 00000000000..066d3922fff --- /dev/null +++ b/interpreters/luajit/Make.defs @@ -0,0 +1,23 @@ +############################################################################ +# apps/interpreters/luajit/Make.defs +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. The +# ASF licenses this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance with the +# License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +############################################################################ + +ifneq ($(CONFIG_INTERPRETERS_LUAJIT),) +CONFIGURED_APPS += $(APPDIR)/interpreters/luajit +endif diff --git a/interpreters/luajit/Makefile b/interpreters/luajit/Makefile new file mode 100644 index 00000000000..83996829425 --- /dev/null +++ b/interpreters/luajit/Makefile @@ -0,0 +1,272 @@ +############################################################################ +# apps/interpreters/luajit/Makefile +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. The +# ASF licenses this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance with the +# License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +############################################################################ + +include $(APPDIR)/Make.defs + +LUAJIT_PATCHS ?= $(sort $(wildcard 000*.patch)) + +LUAJIT_VERSION := $(patsubst "%",%,$(CONFIG_LUAJIT_VERSION)) +LUAJIT_UNPACK = LuaJIT +LUAJIT_TARBALL = $(LUAJIT_VERSION).tar.gz +LUAJIT_URL_BASE = https://github.com/LuaJIT/LuaJIT/archive +LUAJIT_URL = $(LUAJIT_URL_BASE)/$(LUAJIT_TARBALL) + +MAINSRC = luajit.c + +CSRCS += lib_aux.c +CSRCS += lib_base.c +CSRCS += lib_bit.c +CSRCS += lib_buffer.c +CSRCS += lib_debug.c +CSRCS += lib_ffi.c +CSRCS += lib_init.c +CSRCS += lib_io.c +CSRCS += lib_jit.c +CSRCS += lib_math.c +CSRCS += lib_os.c +CSRCS += lib_package.c +CSRCS += lib_string.c +CSRCS += lib_table.c +CSRCS += lj_alloc.c +CSRCS += lj_api.c +CSRCS += lj_asm.c +CSRCS += lj_assert.c +CSRCS += lj_bc.c +CSRCS += lj_bcread.c +CSRCS += lj_bcwrite.c +CSRCS += lj_buf.c +CSRCS += lj_carith.c +CSRCS += lj_ccall.c +CSRCS += lj_ccallback.c +CSRCS += lj_cconv.c +CSRCS += lj_cdata.c +CSRCS += lj_char.c +CSRCS += lj_clib.c +CSRCS += lj_cparse.c +CSRCS += lj_crecord.c +CSRCS += lj_ctype.c +CSRCS += lj_debug.c +CSRCS += lj_dispatch.c +CSRCS += lj_err.c +CSRCS += lj_ffrecord.c +CSRCS += lj_func.c +CSRCS += lj_gc.c +CSRCS += lj_gdbjit.c +CSRCS += lj_ir.c +CSRCS += lj_lex.c +CSRCS += lj_lib.c +CSRCS += lj_load.c +CSRCS += lj_mcode.c +CSRCS += lj_meta.c +CSRCS += lj_obj.c +CSRCS += lj_opt_dce.c +CSRCS += lj_opt_fold.c +CSRCS += lj_opt_loop.c +CSRCS += lj_opt_mem.c +CSRCS += lj_opt_narrow.c +CSRCS += lj_opt_sink.c +CSRCS += lj_opt_split.c +CSRCS += lj_parse.c +CSRCS += lj_prng.c +CSRCS += lj_profile.c +CSRCS += lj_record.c +CSRCS += lj_serialize.c +CSRCS += lj_snap.c +CSRCS += lj_state.c +CSRCS += lj_str.c +CSRCS += lj_strfmt.c +CSRCS += lj_strfmt_num.c +CSRCS += lj_strscan.c +CSRCS += lj_tab.c +CSRCS += lj_trace.c +CSRCS += lj_udata.c +CSRCS += lj_vmevent.c +CSRCS += lj_vmmath.c + +ASRCS += lj_vm.S + +VPATH += $(LUAJIT_UNPACK)/dynasm +VPATH += $(LUAJIT_UNPACK)/src +VPATH += $(LUAJIT_UNPACK)/src/host + +CFLAGS += -fomit-frame-pointer +CFLAGS += -fno-short-enums +CFLAGS += -D__ARM_ARCH_7M__ +CFLAGS += -DLUAJIT_DISABLE_PROFILE +CFLAGS += -DLUAJIT_NO_UNWIND +CFLAGS += -DLUAJIT_OS=LUAJIT_OS_NUTTX +CFLAGS += -DLUAJIT_SECURITY_MCODE=0 +CFLAGS += -DLUAJIT_SECURITY_PRNG=0 +CFLAGS += -DLUA_ROOT=CONFIG_LIBC_HOMEDIR + +PROGNAME = luajit +PRIORITY = $(CONFIG_LUAJIT_PRIORITY) +STACKSIZE = $(CONFIG_LUAJIT_STACKSIZE) +MODULE = $(CONFIG_INTERPRETERS_LUAJIT) + +$(LUAJIT_TARBALL): + $(call DOWNLOAD,$(LUAJIT_URL_BASE),$(LUAJIT_TARBALL),$(LUAJIT_TARBALL)) + +$(LUAJIT_UNPACK): $(LUAJIT_TARBALL) + $(ECHO_BEGIN) "Unpacking $(LUAJIT_TARBALL) to $(LUAJIT_UNPACK)" + $(Q) tar xzvf $(LUAJIT_TARBALL) + $(Q) mv LuaJIT-$(LUAJIT_VERSION) $(LUAJIT_UNPACK) + $(Q) cat $(LUAJIT_PATCHS) | \ + patch -s -N -d $(LUAJIT_UNPACK) -p1 + $(ECHO_END) + +$(LUAJIT_UNPACK)/.patch: $(LUAJIT_UNPACK) + $(Q) touch $(LUAJIT_UNPACK)/.patch + +DASM_ARCH = armv7m + +DASM_DASC = $(LUAJIT_UNPACK)/src/vm_armv7m.dasc + +DASM_FLAGS += -D ENDIAN_LE +DASM_FLAGS += -D JIT +DASM_FLAGS += -D FFI +DASM_FLAGS += -D DUALNUM +DASM_FLAGS += -D FPU +DASM_FLAGS += -D HFABI +DASM_FLAGS += -D NO_UNWIND +DASM_FLAGS += -D VER=70 + +MINILUA_O = $(LUAJIT_UNPACK)/src/host/minilua.o + +BUILDVM_O = $(LUAJIT_UNPACK)/src/host/buildvm.o \ + $(LUAJIT_UNPACK)/src/host/buildvm_asm.o \ + $(LUAJIT_UNPACK)/src/host/buildvm_fold.o \ + $(LUAJIT_UNPACK)/src/host/buildvm_lib.o \ + $(LUAJIT_UNPACK)/src/host/buildvm_peobj.o + +HOSTCFLAGS += -D__ARM_ARCH_7M__=1 +HOSTCFLAGS += -DLJ_ABI_SOFTFP=0 +HOSTCFLAGS += -DLJ_ARCH_HASFPU=1 +HOSTCFLAGS += -DLUAJIT_DISABLE_PROFILE +HOSTCFLAGS += -DLUAJIT_NO_UNWIND +HOSTCFLAGS += -DLUAJIT_TARGET=LUAJIT_ARCH_arm +HOSTCFLAGS += -I$(LUAJIT_UNPACK)/src +HOSTCFLAGS += -I$(LUAJIT_UNPACK)/src/host + +HOSTLDFLAGS += -lm + +BUILDVM_BIN = $(LUAJIT_UNPACK)/src/host/buildvm + +MINILUA_BIN = $(LUAJIT_UNPACK)/src/host/minilua + +LJVM_S = $(LUAJIT_UNPACK)/src/lj_vm.S + +LIB_VMDEF = $(LUAJIT_UNPACK)/src/jit/vmdef.lua + +LIB_VMDEFP = $(LIB_VMDEF) + +ALL_T = $(MINILUA_BIN) + +ALL_HDRGEN = $(LUAJIT_UNPACK)/src/lj_bcdef.h +ALL_HDRGEN += $(LUAJIT_UNPACK)/src/lj_ffdef.h +ALL_HDRGEN += $(LUAJIT_UNPACK)/src/lj_folddef.h +ALL_HDRGEN += $(LUAJIT_UNPACK)/src/lj_libdef.h +ALL_HDRGEN += $(LUAJIT_UNPACK)/src/lj_recdef.h + +ALL_GEN = $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) + +HOST_OBJS = $(MINILUA_O) $(BUILDVM_O) + +LJLIB_O = $(LUAJIT_UNPACK)/src/lib_base.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_bit.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_buffer.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_debug.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_ffi.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_io.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_jit.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_math.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_os.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_package.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_string.o +LJLIB_O += $(LUAJIT_UNPACK)/src/lib_table.o + +LJLIB_C = $(LJLIB_O:.o=.c) + +.NOTPARALLEL: + +$(MINILUA_BIN): $(MINILUA_O) + $(ECHO_BEGIN) "HOSTLINK $@" + $(Q) $(HOSTCC) -m32 -o $@ $(MINILUA_O) $(HOSTLDFLAGS) + $(ECHO_END) + +$(LUAJIT_UNPACK)/src/host/buildvm_arch.h: $(DASM_DASC) $(MINILUA_BIN) $(LUAJIT_UNPACK)/src/lj_arch.h $(LUAJIT_UNPACK)/src/lua.h $(LUAJIT_UNPACK)/src/luaconf.h + $(ECHO_BEGIN) "DYNASM $@" + $(Q)$(MINILUA_BIN) $(LUAJIT_UNPACK)/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ $(DASM_DASC) + $(ECHO_END) + +$(BUILDVM_BIN): $(BUILDVM_O) + $(ECHO_BEGIN) "HOSTLINK $@" + $(Q)$(HOSTCC) -m32 -o $@ $(BUILDVM_O) + $(ECHO_END) + +$(LJVM_S): $(BUILDVM_BIN) + $(ECHO_BEGIN) "BUILDVM $@" + $(Q)$(BUILDVM_BIN) -m elfasm -o $@ + $(ECHO_END) + +$(LUAJIT_UNPACK)/src/lj_bcdef.h: $(BUILDVM_BIN) $(LJLIB_C) + $(ECHO_BEGIN) "BUILDVM $@" + $(Q)$(BUILDVM_BIN) -m bcdef -o $@ $(LJLIB_C) + $(ECHO_END) + +$(LUAJIT_UNPACK)/src/lj_ffdef.h: $(BUILDVM_BIN) $(LJLIB_C) + $(ECHO_BEGIN) "BUILDVM $@" + $(Q)$(BUILDVM_BIN) -m ffdef -o $@ $(LJLIB_C) + $(ECHO_END) + +$(LUAJIT_UNPACK)/src/lj_libdef.h: $(BUILDVM_BIN) $(LJLIB_C) + $(ECHO_BEGIN) "BUILDVM $@" + $(Q)$(BUILDVM_BIN) -m libdef -o $@ $(LJLIB_C) + $(ECHO_END) + +$(LUAJIT_UNPACK)/src/lj_recdef.h: $(BUILDVM_BIN) $(LJLIB_C) + $(ECHO_BEGIN) "BUILDVM $@" + $(Q)$(BUILDVM_BIN) -m recdef -o $@ $(LJLIB_C) + $(ECHO_END) + +$(LIB_VMDEF): $(BUILDVM_BIN) $(LJLIB_C) + $(ECHO_BEGIN) "BUILDVM $@" + $(Q)$(BUILDVM_BIN) -m vmdef -o $(LIB_VMDEFP) $(LJLIB_C) + $(ECHO_END) + +$(LUAJIT_UNPACK)/src/lj_folddef.h: $(BUILDVM_BIN) $(LUAJIT_UNPACK)/src/lj_opt_fold.c + $(ECHO_BEGIN) "BUILDVM $@" + $(Q)$(BUILDVM_BIN) -m folddef -o $@ $(LUAJIT_UNPACK)/src/lj_opt_fold.c + $(ECHO_END) + +$(HOST_OBJS): %.o: %.c + $(ECHO_BEGIN) "CC: $<" + $(Q) $(HOSTCC) -m32 -c $(HOSTCFLAGS) $< -o $@ + $(ECHO_END) + +ifeq ($(wildcard $(LUAJIT_UNPACK)/.git),) +context:: $(LUAJIT_UNPACK)/.patch $(LUAJIT_UNPACK)/src/host/buildvm_arch.h $(ALL_GEN) + +distclean:: + $(call DELDIR, $(LUAJIT_UNPACK)) + $(call DELFILE, $(LUAJIT_TARBALL)) +endif + +include $(APPDIR)/Application.mk