sourcemod/sourcepawn/jit/x86/jit_x86.cpp
2014-08-21 22:16:07 -07:00

2112 lines
52 KiB
C++

/**
* vim: set ts=2 sw=2 tw=99 et:
* =============================================================================
* SourceMod
* Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved.
* =============================================================================
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, version 3.0, as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, AlliedModders LLC gives you permission to link the
* code of this program (as well as its derivative works) to "Half-Life 2," the
* "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
* by the Valve Corporation. You must obey the GNU General Public License in
* all respects for all other code used. Additionally, AlliedModders LLC grants
* this exception to all derivative works. AlliedModders LLC defines further
* exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
* or <http://www.sourcemod.net/license.php>.
*
* Version: $Id$
*/
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "jit_x86.h"
#include "../sp_vm_engine.h"
#include "../engine2.h"
#include "../BaseRuntime.h"
#include "../sp_vm_basecontext.h"
#include "watchdog_timer.h"
#include "interpreter.h"
using namespace sp;
using namespace Knight;
#if defined USE_UNGEN_OPCODES
#include "ungen_opcodes.h"
#endif
#define __ masm.
JITX86 g_Jit;
KeCodeCache *g_pCodeCache = NULL;
ISourcePawnEngine *engine = &g_engine1;
static inline uint8_t *
LinkCode(AssemblerX86 &masm)
{
if (masm.outOfMemory())
return NULL;
void *code = Knight::KE_AllocCode(g_pCodeCache, masm.length());
if (!code)
return NULL;
masm.emitToExecutableMemory(code);
return reinterpret_cast<uint8_t *>(code);
}
static inline ConditionCode
OpToCondition(OPCODE op)
{
switch (op) {
case OP_EQ:
case OP_JEQ:
return equal;
case OP_NEQ:
case OP_JNEQ:
return not_equal;
case OP_SLESS:
case OP_JSLESS:
return less;
case OP_SLEQ:
case OP_JSLEQ:
return less_equal;
case OP_SGRTR:
case OP_JSGRTR:
return greater;
case OP_SGEQ:
case OP_JSGEQ:
return greater_equal;
default:
assert(false);
return negative;
}
}
struct array_creation_t
{
const cell_t *dim_list; /* Dimension sizes */
cell_t dim_count; /* Number of dimensions */
cell_t *data_offs; /* Current offset AFTER the indirection vectors (data) */
cell_t *base; /* array base */
};
static cell_t
GenerateInnerArrayIndirectionVectors(array_creation_t *ar, int dim, cell_t cur_offs)
{
cell_t write_offs = cur_offs;
cell_t *data_offs = ar->data_offs;
cur_offs += ar->dim_list[dim];
// Dimension n-x where x > 2 will have sub-vectors.
// Otherwise, we just need to reference the data section.
if (ar->dim_count > 2 && dim < ar->dim_count - 2) {
// For each index at this dimension, write offstes to our sub-vectors.
// After we write one sub-vector, we generate its sub-vectors recursively.
// At the end, we're given the next offset we can use.
for (int i = 0; i < ar->dim_list[dim]; i++) {
ar->base[write_offs] = (cur_offs - write_offs) * sizeof(cell_t);
write_offs++;
cur_offs = GenerateInnerArrayIndirectionVectors(ar, dim + 1, cur_offs);
}
} else {
// In this section, there are no sub-vectors, we need to write offsets
// to the data. This is separate so the data stays in one big chunk.
// The data offset will increment by the size of the last dimension,
// because that is where the data is finally computed as.
for (int i = 0; i < ar->dim_list[dim]; i++) {
ar->base[write_offs] = (*data_offs - write_offs) * sizeof(cell_t);
write_offs++;
*data_offs = *data_offs + ar->dim_list[dim + 1];
}
}
return cur_offs;
}
static cell_t
calc_indirection(const array_creation_t *ar, cell_t dim)
{
cell_t size = ar->dim_list[dim];
if (dim < ar->dim_count - 2)
size += ar->dim_list[dim] * calc_indirection(ar, dim + 1);
return size;
}
static cell_t
GenerateArrayIndirectionVectors(cell_t *arraybase, cell_t dims[], cell_t _dimcount, bool autozero)
{
array_creation_t ar;
cell_t data_offs;
/* Reverse the dimensions */
cell_t dim_list[sDIMEN_MAX];
int cur_dim = 0;
for (int i = _dimcount - 1; i >= 0; i--)
dim_list[cur_dim++] = dims[i];
ar.base = arraybase;
ar.dim_list = dim_list;
ar.dim_count = _dimcount;
ar.data_offs = &data_offs;
data_offs = calc_indirection(&ar, 0);
GenerateInnerArrayIndirectionVectors(&ar, 0, 0);
return data_offs;
}
int
GenerateFullArray(BaseRuntime *rt, uint32_t argc, cell_t *argv, int autozero)
{
sp_context_t *ctx = rt->GetBaseContext()->GetCtx();
// Calculate how many cells are needed.
if (argv[0] <= 0)
return SP_ERROR_ARRAY_TOO_BIG;
uint32_t cells = argv[0];
for (uint32_t dim = 1; dim < argc; dim++) {
cell_t dimsize = argv[dim];
if (dimsize <= 0)
return SP_ERROR_ARRAY_TOO_BIG;
if (!ke::IsUint32MultiplySafe(cells, dimsize))
return SP_ERROR_ARRAY_TOO_BIG;
cells *= uint32_t(dimsize);
if (!ke::IsUint32AddSafe(cells, dimsize))
return SP_ERROR_ARRAY_TOO_BIG;
cells += uint32_t(dimsize);
}
if (!ke::IsUint32MultiplySafe(cells, 4))
return SP_ERROR_ARRAY_TOO_BIG;
uint32_t bytes = cells * 4;
if (!ke::IsUint32AddSafe(ctx->hp, bytes))
return SP_ERROR_ARRAY_TOO_BIG;
uint32_t new_hp = ctx->hp + bytes;
cell_t *dat_hp = reinterpret_cast<cell_t *>(rt->plugin()->memory + new_hp);
// argv, coincidentally, is STK.
if (dat_hp >= argv - STACK_MARGIN)
return SP_ERROR_HEAPLOW;
if (int err = PushTracker(rt->GetBaseContext()->GetCtx(), bytes))
return err;
cell_t *base = reinterpret_cast<cell_t *>(rt->plugin()->memory + ctx->hp);
cell_t offs = GenerateArrayIndirectionVectors(base, argv, argc, !!autozero);
assert(size_t(offs) == cells);
argv[argc - 1] = ctx->hp;
ctx->hp = new_hp;
return SP_ERROR_NONE;
}
#if !defined NDEBUG
static const char *
GetFunctionName(const sp_plugin_t *plugin, uint32_t offs)
{
if (!plugin->debug.unpacked) {
uint32_t max, iter;
sp_fdbg_symbol_t *sym;
sp_fdbg_arraydim_t *arr;
uint8_t *cursor = (uint8_t *)(plugin->debug.symbols);
max = plugin->debug.syms_num;
for (iter = 0; iter < max; iter++) {
sym = (sp_fdbg_symbol_t *)cursor;
if (sym->ident == sp::IDENT_FUNCTION && sym->codestart <= offs && sym->codeend > offs)
return plugin->debug.stringbase + sym->name;
if (sym->dimcount > 0) {
cursor += sizeof(sp_fdbg_symbol_t);
arr = (sp_fdbg_arraydim_t *)cursor;
cursor += sizeof(sp_fdbg_arraydim_t) * sym->dimcount;
continue;
}
cursor += sizeof(sp_fdbg_symbol_t);
}
} else {
uint32_t max, iter;
sp_u_fdbg_symbol_t *sym;
sp_u_fdbg_arraydim_t *arr;
uint8_t *cursor = (uint8_t *)(plugin->debug.symbols);
max = plugin->debug.syms_num;
for (iter = 0; iter < max; iter++) {
sym = (sp_u_fdbg_symbol_t *)cursor;
if (sym->ident == sp::IDENT_FUNCTION && sym->codestart <= offs && sym->codeend > offs)
return plugin->debug.stringbase + sym->name;
if (sym->dimcount > 0) {
cursor += sizeof(sp_u_fdbg_symbol_t);
arr = (sp_u_fdbg_arraydim_t *)cursor;
cursor += sizeof(sp_u_fdbg_arraydim_t) * sym->dimcount;
continue;
}
cursor += sizeof(sp_u_fdbg_symbol_t);
}
}
return NULL;
}
#endif
static int
CompileFromThunk(BaseRuntime *runtime, cell_t pcode_offs, void **addrp, char *pc)
{
// If the watchdog timer has declared a timeout, we must process it now,
// and possibly refuse to compile, since otherwise we will compile a
// function that is not patched for timeouts.
if (!g_WatchdogTimer.HandleInterrupt())
return SP_ERROR_TIMEOUT;
JitFunction *fn = runtime->GetJittedFunctionByOffset(pcode_offs);
if (!fn) {
int err;
fn = g_Jit.CompileFunction(runtime, pcode_offs, &err);
if (!fn)
return err;
}
#if defined JIT_SPEW
g_engine1.GetDebugHook()->OnDebugSpew(
"Patching thunk to %s::%s\n",
runtime->plugin()->name,
GetFunctionName(runtime->plugin(), pcode_offs));
#endif
*addrp = fn->GetEntryAddress();
/* Right now, we always keep the code RWE */
*(intptr_t *)(pc - 4) = intptr_t(fn->GetEntryAddress()) - intptr_t(pc);
return SP_ERROR_NONE;
}
Compiler::Compiler(BaseRuntime *rt, cell_t pcode_offs)
: rt_(rt),
plugin_(rt->plugin()),
error_(SP_ERROR_NONE),
pcode_start_(pcode_offs),
code_start_(reinterpret_cast<cell_t *>(plugin_->pcode + pcode_start_)),
cip_(code_start_),
code_end_(reinterpret_cast<cell_t *>(plugin_->pcode + plugin_->pcode_size))
{
size_t nmaxops = plugin_->pcode_size / sizeof(cell_t) + 1;
jump_map_ = new Label[nmaxops];
}
Compiler::~Compiler()
{
delete [] jump_map_;
}
JitFunction *
Compiler::emit(int *errp)
{
if (cip_ >= code_end_ || *cip_ != OP_PROC) {
*errp = SP_ERROR_INVALID_INSTRUCTION;
return NULL;
}
#if defined JIT_SPEW
g_engine1.GetDebugHook()->OnDebugSpew(
"Compiling function %s::%s\n",
plugin_->name,
GetFunctionName(plugin_, pcode_start_));
SpewOpcode(plugin_, code_start_, cip_);
#endif
cell_t *codeseg = reinterpret_cast<cell_t *>(plugin_->pcode);
cip_++;
if (!emitOp(OP_PROC)) {
*errp = (error_ == SP_ERROR_NONE) ? SP_ERROR_OUT_OF_MEMORY : error_;
return NULL;
}
while (cip_ < code_end_) {
// If we reach the end of this function, or the beginning of a new
// procedure, then stop.
if (*cip_ == OP_PROC || *cip_ == OP_ENDPROC)
break;
#if defined JIT_SPEW
SpewOpcode(plugin_, code_start_, cip_);
#endif
// We assume every instruction is a jump target, so before emitting
// an opcode, we bind its corresponding label.
__ bind(&jump_map_[cip_ - codeseg]);
OPCODE op = (OPCODE)readCell();
if (!emitOp(op) || error_ != SP_ERROR_NONE) {
*errp = (error_ == SP_ERROR_NONE) ? SP_ERROR_OUT_OF_MEMORY : error_;
return NULL;
}
}
emitCallThunks();
emitErrorPaths();
uint8_t *code = LinkCode(masm);
if (!code) {
*errp = SP_ERROR_OUT_OF_MEMORY;
return NULL;
}
LoopEdge *edges = new LoopEdge[backward_jumps_.length()];
for (size_t i = 0; i < backward_jumps_.length(); i++) {
edges[i].offset = backward_jumps_[i];
edges[i].disp32 = *reinterpret_cast<int32_t *>(code + edges[i].offset - 4);
}
return new JitFunction(code, pcode_start_, edges, backward_jumps_.length());
}
bool
Compiler::emitOp(OPCODE op)
{
switch (op) {
case OP_MOVE_PRI:
__ movl(pri, alt);
break;
case OP_MOVE_ALT:
__ movl(alt, pri);
break;
case OP_XCHG:
__ xchgl(pri, alt);
break;
case OP_ZERO:
{
cell_t offset = readCell();
__ movl(Operand(dat, offset), 0);
break;
}
case OP_ZERO_S:
{
cell_t offset = readCell();
__ movl(Operand(frm, offset), 0);
break;
}
case OP_PUSH_PRI:
case OP_PUSH_ALT:
{
Register reg = (op == OP_PUSH_PRI) ? pri : alt;
__ movl(Operand(stk, -4), reg);
__ subl(stk, 4);
break;
}
case OP_PUSH_C:
case OP_PUSH2_C:
case OP_PUSH3_C:
case OP_PUSH4_C:
case OP_PUSH5_C:
{
int n = 1;
if (op >= OP_PUSH2_C)
n = ((op - OP_PUSH2_C) / 4) + 2;
int i = 1;
do {
cell_t val = readCell();
__ movl(Operand(stk, -(4 * i)), val);
} while (i++ < n);
__ subl(stk, 4 * n);
break;
}
case OP_PUSH_ADR:
case OP_PUSH2_ADR:
case OP_PUSH3_ADR:
case OP_PUSH4_ADR:
case OP_PUSH5_ADR:
{
int n = 1;
if (op >= OP_PUSH2_ADR)
n = ((op - OP_PUSH2_ADR) / 4) + 2;
int i = 1;
// We temporarily relocate FRM to be a local address instead of an
// absolute address.
__ subl(frm, dat);
do {
cell_t offset = readCell();
__ lea(tmp, Operand(frm, offset));
__ movl(Operand(stk, -(4 * i)), tmp);
} while (i++ < n);
__ subl(stk, 4 * n);
__ addl(frm, dat);
break;
}
case OP_PUSH_S:
case OP_PUSH2_S:
case OP_PUSH3_S:
case OP_PUSH4_S:
case OP_PUSH5_S:
{
int n = 1;
if (op >= OP_PUSH2_S)
n = ((op - OP_PUSH2_S) / 4) + 2;
int i = 1;
do {
cell_t offset = readCell();
__ movl(tmp, Operand(frm, offset));
__ movl(Operand(stk, -(4 * i)), tmp);
} while (i++ < n);
__ subl(stk, 4 * n);
break;
}
case OP_PUSH:
case OP_PUSH2:
case OP_PUSH3:
case OP_PUSH4:
case OP_PUSH5:
{
int n = 1;
if (op >= OP_PUSH2)
n = ((op - OP_PUSH2) / 4) + 2;
int i = 1;
do {
cell_t offset = readCell();
__ movl(tmp, Operand(dat, offset));
__ movl(Operand(stk, -(4 * i)), tmp);
} while (i++ < n);
__ subl(stk, 4 * n);
break;
}
case OP_ZERO_PRI:
__ xorl(pri, pri);
break;
case OP_ZERO_ALT:
__ xorl(alt, alt);
break;
case OP_ADD:
__ addl(pri, alt);
break;
case OP_SUB:
__ subl(pri, alt);
break;
case OP_SUB_ALT:
__ movl(tmp, alt);
__ subl(tmp, pri);
__ movl(pri, tmp);
break;
case OP_PROC:
// Push the old frame onto the stack.
__ movl(tmp, Operand(frmAddr()));
__ movl(Operand(stk, -4), tmp);
__ subl(stk, 8); // extra unused slot for non-existant CIP
// Get and store the new frame.
__ movl(tmp, stk);
__ movl(frm, stk);
__ subl(tmp, dat);
__ movl(Operand(frmAddr()), tmp);
// Align the stack to 16-bytes (each call adds 4 bytes).
__ subl(esp, 12);
break;
case OP_IDXADDR_B:
{
cell_t val = readCell();
__ shll(pri, val);
__ addl(pri, alt);
break;
}
case OP_SHL:
__ movl(ecx, alt);
__ shll_cl(pri);
break;
case OP_SHR:
__ movl(ecx, alt);
__ shrl_cl(pri);
break;
case OP_SSHR:
__ movl(ecx, alt);
__ sarl_cl(pri);
break;
case OP_SHL_C_PRI:
case OP_SHL_C_ALT:
{
Register reg = (op == OP_SHL_C_PRI) ? pri : alt;
cell_t val = readCell();
__ shll(reg, val);
break;
}
case OP_SHR_C_PRI:
case OP_SHR_C_ALT:
{
Register reg = (op == OP_SHR_C_PRI) ? pri : alt;
cell_t val = readCell();
__ shrl(reg, val);
break;
}
case OP_SMUL:
__ imull(pri, alt);
break;
case OP_NOT:
__ testl(eax, eax);
__ movl(eax, 0);
__ set(zero, r8_al);
break;
case OP_NEG:
__ negl(eax);
break;
case OP_XOR:
__ xorl(pri, alt);
break;
case OP_OR:
__ orl(pri, alt);
break;
case OP_AND:
__ andl(pri, alt);
break;
case OP_INVERT:
__ notl(pri);
break;
case OP_ADD_C:
{
cell_t val = readCell();
__ addl(pri, val);
break;
}
case OP_SMUL_C:
{
cell_t val = readCell();
__ imull(pri, pri, val);
break;
}
case OP_EQ:
case OP_NEQ:
case OP_SLESS:
case OP_SLEQ:
case OP_SGRTR:
case OP_SGEQ:
{
ConditionCode cc = OpToCondition(op);
__ cmpl(pri, alt);
__ movl(pri, 0);
__ set(cc, r8_al);
break;
}
case OP_EQ_C_PRI:
case OP_EQ_C_ALT:
{
Register reg = (op == OP_EQ_C_PRI) ? pri : alt;
cell_t val = readCell();
__ cmpl(reg, val);
__ movl(pri, 0);
__ set(equal, r8_al);
break;
}
case OP_INC_PRI:
case OP_INC_ALT:
{
Register reg = (OP_INC_PRI) ? pri : alt;
__ addl(reg, 1);
break;
}
case OP_INC:
case OP_INC_S:
{
Register base = (op == OP_INC) ? dat : frm;
cell_t offset = readCell();
__ addl(Operand(base, offset), 1);
break;
}
case OP_INC_I:
__ addl(Operand(dat, pri, NoScale), 1);
break;
case OP_DEC_PRI:
case OP_DEC_ALT:
{
Register reg = (op == OP_DEC_PRI) ? pri : alt;
__ subl(reg, 1);
break;
}
case OP_DEC:
case OP_DEC_S:
{
Register base = (op == OP_DEC) ? dat : frm;
cell_t offset = readCell();
__ subl(Operand(base, offset), 1);
break;
}
case OP_DEC_I:
__ subl(Operand(dat, pri, NoScale), 1);
break;
case OP_LOAD_PRI:
case OP_LOAD_ALT:
{
Register reg = (op == OP_LOAD_PRI) ? pri : alt;
cell_t offset = readCell();
__ movl(reg, Operand(dat, offset));
break;
}
case OP_LOAD_BOTH:
{
cell_t offs1 = readCell();
cell_t offs2 = readCell();
__ movl(pri, Operand(dat, offs1));
__ movl(alt, Operand(dat, offs2));
break;
}
case OP_LOAD_S_PRI:
case OP_LOAD_S_ALT:
{
Register reg = (op == OP_LOAD_S_PRI) ? pri : alt;
cell_t offset = readCell();
__ movl(reg, Operand(frm, offset));
break;
}
case OP_LOAD_S_BOTH:
{
cell_t offs1 = readCell();
cell_t offs2 = readCell();
__ movl(pri, Operand(frm, offs1));
__ movl(alt, Operand(frm, offs2));
break;
}
case OP_LREF_S_PRI:
case OP_LREF_S_ALT:
{
Register reg = (op == OP_LREF_S_PRI) ? pri : alt;
cell_t offset = readCell();
__ movl(reg, Operand(frm, offset));
__ movl(reg, Operand(dat, reg, NoScale));
break;
}
case OP_CONST_PRI:
case OP_CONST_ALT:
{
Register reg = (op == OP_CONST_PRI) ? pri : alt;
cell_t val = readCell();
__ movl(reg, val);
break;
}
case OP_ADDR_PRI:
case OP_ADDR_ALT:
{
Register reg = (op == OP_ADDR_PRI) ? pri : alt;
cell_t offset = readCell();
__ movl(reg, Operand(frmAddr()));
__ addl(reg, offset);
break;
}
case OP_STOR_PRI:
case OP_STOR_ALT:
{
Register reg = (op == OP_STOR_PRI) ? pri : alt;
cell_t offset = readCell();
__ movl(Operand(dat, offset), reg);
break;
}
case OP_STOR_S_PRI:
case OP_STOR_S_ALT:
{
Register reg = (op == OP_STOR_S_PRI) ? pri : alt;
cell_t offset = readCell();
__ movl(Operand(frm, offset), reg);
break;
}
case OP_IDXADDR:
__ lea(pri, Operand(alt, pri, ScaleFour));
break;
case OP_SREF_S_PRI:
case OP_SREF_S_ALT:
{
Register reg = (op == OP_SREF_S_PRI) ? pri : alt;
cell_t offset = readCell();
__ movl(tmp, Operand(frm, offset));
__ movl(Operand(dat, tmp, NoScale), reg);
break;
}
case OP_POP_PRI:
case OP_POP_ALT:
{
Register reg = (op == OP_POP_PRI) ? pri : alt;
__ movl(reg, Operand(stk, 0));
__ addl(stk, 4);
break;
}
case OP_SWAP_PRI:
case OP_SWAP_ALT:
{
Register reg = (op == OP_SWAP_PRI) ? pri : alt;
__ movl(tmp, Operand(stk, 0));
__ movl(Operand(stk, 0), reg);
__ movl(reg, tmp);
break;
}
case OP_LIDX:
__ lea(pri, Operand(alt, pri, ScaleFour));
__ movl(pri, Operand(dat, pri, NoScale));
break;
case OP_LIDX_B:
{
cell_t val = readCell();
if (val >= 0 && val <= 3) {
__ lea(pri, Operand(alt, pri, Scale(val)));
} else {
__ shll(pri, val);
__ addl(pri, alt);
}
emitCheckAddress(pri);
__ movl(pri, Operand(dat, pri, NoScale));
break;
}
case OP_CONST:
case OP_CONST_S:
{
Register base = (op == OP_CONST) ? dat : frm;
cell_t offset = readCell();
cell_t val = readCell();
__ movl(Operand(base, offset), val);
break;
}
case OP_LOAD_I:
emitCheckAddress(pri);
__ movl(pri, Operand(dat, pri, NoScale));
break;
case OP_STOR_I:
emitCheckAddress(alt);
__ movl(Operand(dat, alt, NoScale), pri);
break;
case OP_SDIV:
case OP_SDIV_ALT:
{
Register dividend = (op == OP_SDIV) ? pri : alt;
Register divisor = (op == OP_SDIV) ? alt : pri;
// Guard against divide-by-zero.
__ testl(divisor, divisor);
__ j(zero, &error_divide_by_zero_);
// A more subtle case; -INT_MIN / -1 yields an overflow exception.
Label ok;
__ cmpl(divisor, -1);
__ j(not_equal, &ok);
__ cmpl(dividend, 0x80000000);
__ j(equal, &error_integer_overflow_);
__ bind(&ok);
// Now we can actually perform the divide.
__ movl(tmp, divisor);
if (op == OP_SDIV)
__ movl(edx, dividend);
else
__ movl(eax, dividend);
__ sarl(edx, 31);
__ idivl(tmp);
break;
}
case OP_LODB_I:
{
cell_t val = readCell();
emitCheckAddress(pri);
__ movl(pri, Operand(dat, pri, NoScale));
if (val == 1)
__ andl(pri, 0xff);
else if (val == 2)
__ andl(pri, 0xffff);
break;
}
case OP_STRB_I:
{
cell_t val = readCell();
emitCheckAddress(alt);
if (val == 1)
__ movb(Operand(dat, alt, NoScale), pri);
else if (val == 2)
__ movw(Operand(dat, alt, NoScale), pri);
else if (val == 4)
__ movl(Operand(dat, alt, NoScale), pri);
break;
}
case OP_RETN:
{
// Restore the old frame pointer.
__ movl(frm, Operand(stk, 4)); // get the old frm
__ addl(stk, 8); // pop stack
__ movl(Operand(frmAddr()), frm); // store back old frm
__ addl(frm, dat); // relocate
// Remove parameters.
__ movl(tmp, Operand(stk, 0));
__ lea(stk, Operand(stk, tmp, ScaleFour, 4));
__ addl(esp, 12);
__ ret();
break;
}
case OP_MOVS:
{
cell_t val = readCell();
unsigned dwords = val / 4;
unsigned bytes = val % 4;
__ cld();
__ push(esi);
__ push(edi);
// Note: set edi first, since we need esi.
__ lea(edi, Operand(dat, alt, NoScale));
__ lea(esi, Operand(dat, pri, NoScale));
if (dwords) {
__ movl(ecx, dwords);
__ rep_movsd();
}
if (bytes) {
__ movl(ecx, bytes);
__ rep_movsb();
}
__ pop(edi);
__ pop(esi);
break;
}
case OP_FILL:
{
// eax/pri is used implicitly.
unsigned dwords = readCell() / 4;
__ push(edi);
__ lea(edi, Operand(dat, alt, NoScale));
__ movl(ecx, dwords);
__ cld();
__ rep_stosd();
__ pop(edi);
break;
}
case OP_STRADJUST_PRI:
__ addl(pri, 4);
__ sarl(pri, 2);
break;
case OP_FABS:
__ movl(pri, Operand(stk, 0));
__ andl(pri, 0x7fffffff);
__ addl(stk, 4);
break;
case OP_FLOAT:
if (MacroAssemblerX86::Features().sse2) {
__ cvtsi2ss(xmm0, Operand(edi, 0));
__ movd(pri, xmm0);
} else {
__ fild32(Operand(edi, 0));
__ subl(esp, 4);
__ fstp32(Operand(esp, 0));
__ pop(pri);
}
__ addl(stk, 4);
break;
case OP_FLOATADD:
case OP_FLOATSUB:
case OP_FLOATMUL:
case OP_FLOATDIV:
if (MacroAssemblerX86::Features().sse2) {
__ movss(xmm0, Operand(stk, 0));
if (op == OP_FLOATADD)
__ addss(xmm0, Operand(stk, 4));
else if (op == OP_FLOATSUB)
__ subss(xmm0, Operand(stk, 4));
else if (op == OP_FLOATMUL)
__ mulss(xmm0, Operand(stk, 4));
else if (op == OP_FLOATDIV)
__ divss(xmm0, Operand(stk, 4));
__ movd(pri, xmm0);
} else {
__ subl(esp, 4);
__ fld32(Operand(stk, 0));
if (op == OP_FLOATADD)
__ fadd32(Operand(stk, 4));
else if (op == OP_FLOATSUB)
__ fsub32(Operand(stk, 4));
else if (op == OP_FLOATMUL)
__ fmul32(Operand(stk, 4));
else if (op == OP_FLOATDIV)
__ fdiv32(Operand(stk, 4));
__ fstp32(Operand(esp, 0));
__ pop(pri);
}
__ addl(stk, 8);
break;
case OP_RND_TO_NEAREST:
{
if (MacroAssemblerX86::Features().sse) {
// Assume no one is touching MXCSR.
__ cvtss2si(pri, Operand(stk, 0));
} else {
static float kRoundToNearest = 0.5f;
// From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf
__ fld32(Operand(stk, 0));
__ fadd32(st0, st0);
__ fadd32(Operand(ExternalAddress(&kRoundToNearest)));
__ subl(esp, 4);
__ fistp32(Operand(esp, 0));
__ pop(pri);
__ sarl(pri, 1);
}
__ addl(stk, 4);
break;
}
case OP_RND_TO_CEIL:
{
static float kRoundToCeil = -0.5f;
// From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf
__ fld32(Operand(stk, 0));
__ fadd32(st0, st0);
__ fsubr32(Operand(ExternalAddress(&kRoundToCeil)));
__ subl(esp, 4);
__ fistp32(Operand(esp, 0));
__ pop(pri);
__ sarl(pri, 1);
__ negl(pri);
__ addl(stk, 4);
break;
}
case OP_RND_TO_ZERO:
if (MacroAssemblerX86::Features().sse) {
__ cvttss2si(pri, Operand(stk, 0));
} else {
__ fld32(Operand(stk, 0));
__ subl(esp, 8);
__ fstcw(Operand(esp, 4));
__ movl(Operand(esp, 0), 0xfff);
__ fldcw(Operand(esp, 0));
__ fistp32(Operand(esp, 0));
__ pop(pri);
__ fldcw(Operand(esp, 0));
__ addl(esp, 4);
}
__ addl(stk, 4);
break;
case OP_RND_TO_FLOOR:
__ fld32(Operand(stk, 0));
__ subl(esp, 8);
__ fstcw(Operand(esp, 4));
__ movl(Operand(esp, 0), 0x7ff);
__ fldcw(Operand(esp, 0));
__ fistp32(Operand(esp, 0));
__ pop(eax);
__ fldcw(Operand(esp, 0));
__ addl(esp, 4);
__ addl(stk, 4);
break;
// This is the old float cmp, which returns ordered results. In newly
// compiled code it should not be used or generated.
//
// Note that the checks here are inverted: the test is |rhs OP lhs|.
case OP_FLOATCMP:
{
Label bl, ab, done;
if (MacroAssemblerX86::Features().sse) {
__ movss(xmm0, Operand(stk, 4));
__ ucomiss(Operand(stk, 0), xmm0);
} else {
__ fld32(Operand(stk, 0));
__ fld32(Operand(stk, 4));
__ fucomip(st1);
__ fstp(st0);
}
__ j(above, &ab);
__ j(below, &bl);
__ xorl(pri, pri);
__ jmp(&done);
__ bind(&ab);
__ movl(pri, -1);
__ jmp(&done);
__ bind(&bl);
__ movl(pri, 1);
__ bind(&done);
__ addl(stk, 8);
break;
}
case OP_FLOAT_GT:
emitFloatCmp(above);
break;
case OP_FLOAT_GE:
emitFloatCmp(above_equal);
break;
case OP_FLOAT_LE:
emitFloatCmp(below_equal);
break;
case OP_FLOAT_LT:
emitFloatCmp(below);
break;
case OP_FLOAT_EQ:
emitFloatCmp(equal);
break;
case OP_FLOAT_NE:
emitFloatCmp(not_equal);
break;
case OP_FLOAT_NOT:
{
if (MacroAssemblerX86::Features().sse) {
__ xorps(xmm0, xmm0);
__ ucomiss(Operand(stk, 0), xmm0);
} else {
__ fld32(Operand(stk, 0));
__ fldz();
__ fucomip(st1);
__ fstp(st0);
}
// See emitFloatCmp() - this is a shorter version.
Label done;
__ movl(eax, 1);
__ j(parity, &done);
__ set(zero, r8_al);
__ bind(&done);
__ addl(stk, 4);
break;
}
case OP_STACK:
{
cell_t amount = readCell();
__ addl(stk, amount);
if (amount > 0) {
// Check if the stack went beyond the stack top - usually a compiler error.
__ cmpl(stk, intptr_t(plugin_->memory + plugin_->mem_size));
__ j(not_below, &error_stack_min_);
} else {
// Check if the stack is going to collide with the heap.
__ movl(tmp, Operand(hpAddr()));
__ lea(tmp, Operand(dat, ecx, NoScale, STACK_MARGIN));
__ cmpl(stk, tmp);
__ j(below, &error_stack_low_);
}
break;
}
case OP_HEAP:
{
cell_t amount = readCell();
__ movl(alt, Operand(hpAddr()));
__ addl(Operand(hpAddr()), amount);
if (amount < 0) {
__ cmpl(Operand(hpAddr()), plugin_->data_size);
__ j(below, &error_heap_min_);
} else {
__ movl(tmp, Operand(hpAddr()));
__ lea(tmp, Operand(dat, ecx, NoScale, STACK_MARGIN));
__ cmpl(tmp, stk);
__ j(above, &error_heap_low_);
}
break;
}
case OP_JUMP:
{
Label *target = labelAt(readCell());
if (!target)
return false;
if (target->bound()) {
__ jmp32(target);
backward_jumps_.append(masm.pc());
} else {
__ jmp(target);
}
break;
}
case OP_JZER:
case OP_JNZ:
{
ConditionCode cc = (op == OP_JZER) ? zero : not_zero;
Label *target = labelAt(readCell());
if (!target)
return false;
__ testl(pri, pri);
if (target->bound()) {
__ j32(cc, target);
backward_jumps_.append(masm.pc());
} else {
__ j(cc, target);
}
break;
}
case OP_JEQ:
case OP_JNEQ:
case OP_JSLESS:
case OP_JSLEQ:
case OP_JSGRTR:
case OP_JSGEQ:
{
Label *target = labelAt(readCell());
if (!target)
return false;
ConditionCode cc = OpToCondition(op);
__ cmpl(pri, alt);
if (target->bound()) {
__ j32(cc, target);
backward_jumps_.append(masm.pc());
} else {
__ j(cc, target);
}
break;
}
case OP_TRACKER_PUSH_C:
{
cell_t amount = readCell();
__ push(pri);
__ push(alt);
__ push(amount * 4);
__ push(intptr_t(rt_->GetBaseContext()->GetCtx()));
__ call(ExternalAddress((void *)PushTracker));
__ addl(esp, 8);
__ testl(eax, eax);
__ j(not_zero, &extern_error_);
__ pop(alt);
__ pop(pri);
break;
}
case OP_TRACKER_POP_SETHEAP:
{
// Save registers.
__ push(pri);
__ push(alt);
// Get the context pointer and call the sanity checker.
__ push(intptr_t(rt_));
__ call(ExternalAddress((void *)PopTrackerAndSetHeap));
__ addl(esp, 4);
__ testl(eax, eax);
__ j(not_zero, &extern_error_);
__ pop(alt);
__ pop(pri);
break;
}
case OP_BREAK:
{
cell_t cip = uintptr_t(cip_ - 1) - uintptr_t(plugin_->pcode);
__ movl(Operand(cipAddr()), cip);
break;
}
case OP_HALT:
__ align(16);
__ movl(tmp, intptr_t(rt_->GetBaseContext()->GetCtx()));
__ movl(Operand(tmp, offsetof(sp_context_t, rval)), pri);
__ movl(pri, readCell());
__ jmp(&extern_error_);
break;
case OP_BOUNDS:
{
cell_t value = readCell();
__ cmpl(eax, value);
__ j(above, &error_bounds_);
break;
}
case OP_GENARRAY:
case OP_GENARRAY_Z:
emitGenArray(op == OP_GENARRAY_Z);
break;
case OP_CALL:
if (!emitCall())
return false;
break;
case OP_SYSREQ_C:
case OP_SYSREQ_N:
if (!emitNativeCall(op))
return false;
break;
case OP_SWITCH:
if (!emitSwitch())
return false;
break;
case OP_CASETBL:
{
size_t ncases = readCell();
// Two cells per case, and one extra cell for the default address.
cip_ += (ncases * 2) + 1;
break;
}
case OP_NOP:
break;
default:
error_ = SP_ERROR_INVALID_INSTRUCTION;
return false;
}
return true;
}
Label *
Compiler::labelAt(size_t offset)
{
if (offset % 4 != 0 ||
offset > plugin_->pcode_size ||
offset <= pcode_start_)
{
// If the jump target is misaligned, or out of pcode bounds, or is an
// address out of the function bounds, we abort. Unfortunately we can't
// test beyond the end of the function since we don't have a precursor
// pass (yet).
error_ = SP_ERROR_INSTRUCTION_PARAM;
return NULL;
}
return &jump_map_[offset / sizeof(cell_t)];
}
void
Compiler::emitCheckAddress(Register reg)
{
// Check if we're in memory bounds.
__ cmpl(reg, plugin_->mem_size);
__ j(not_below, &error_memaccess_);
// Check if we're in the invalid region between hp and sp.
Label done;
__ cmpl(reg, Operand(hpAddr()));
__ j(below, &done);
__ lea(tmp, Operand(dat, reg, NoScale));
__ cmpl(tmp, stk);
__ j(below, &error_memaccess_);
__ bind(&done);
}
void
Compiler::emitGenArray(bool autozero)
{
cell_t val = readCell();
if (val == 1)
{
// flat array; we can generate this without indirection tables.
// Note that we can overwrite ALT because technically STACK should be destroying ALT
__ movl(alt, Operand(hpAddr()));
__ movl(tmp, Operand(stk, 0));
__ movl(Operand(stk, 0), alt); // store base of the array into the stack.
__ lea(alt, Operand(alt, tmp, ScaleFour));
__ movl(Operand(hpAddr()), alt);
__ addl(alt, dat);
__ cmpl(alt, stk);
__ j(not_below, &error_heap_low_);
__ shll(tmp, 2);
__ push(tmp);
__ push(intptr_t(rt_->GetBaseContext()->GetCtx()));
__ call(ExternalAddress((void *)PushTracker));
__ addl(esp, 4);
__ pop(tmp);
__ shrl(tmp, 2);
__ testl(eax, eax);
__ j(not_zero, &extern_error_);
if (autozero) {
// Note - tmp is ecx and still intact.
__ push(eax);
__ push(edi);
__ xorl(eax, eax);
__ movl(edi, Operand(stk, 0));
__ addl(edi, dat);
__ cld();
__ rep_stosd();
__ pop(edi);
__ pop(eax);
}
} else {
__ push(pri);
// int GenerateArray(sp_plugin_t, vars[], uint32_t, cell_t *, int, unsigned *);
__ push(autozero ? 1 : 0);
__ push(stk);
__ push(val);
__ push(intptr_t(rt_));
__ call(ExternalAddress((void *)GenerateFullArray));
__ addl(esp, 4 * sizeof(void *));
// restore pri to tmp
__ pop(tmp);
__ testl(eax, eax);
__ j(not_zero, &extern_error_);
// Move tmp back to pri, remove pushed args.
__ movl(pri, tmp);
__ addl(stk, (val - 1) * 4);
}
}
bool
Compiler::emitCall()
{
cell_t offset = readCell();
// If this offset looks crappy, i.e. not aligned or out of bounds, we just
// abort.
if (offset % 4 != 0 || uint32_t(offset) >= plugin_->pcode_size) {
error_ = SP_ERROR_INSTRUCTION_PARAM;
return false;
}
// eax = context
// ecx = rp
__ movl(eax, intptr_t(rt_->GetBaseContext()->GetCtx()));
__ movl(ecx, Operand(eax, offsetof(sp_context_t, rp)));
// Check if the return stack is used up.
__ cmpl(ecx, SP_MAX_RETURN_STACK);
__ j(not_below, &error_stack_low_);
// Add to the return stack.
uintptr_t cip = uintptr_t(cip_ - 2) - uintptr_t(plugin_->pcode);
__ movl(Operand(eax, ecx, ScaleFour, offsetof(sp_context_t, rstk_cips)), cip);
// Increment the return stack pointer.
__ addl(Operand(eax, offsetof(sp_context_t, rp)), 1);
// Store the CIP of the function we're about to call.
__ movl(Operand(cipAddr()), offset);
JitFunction *fun = rt_->GetJittedFunctionByOffset(offset);
if (!fun) {
// Need to emit a delayed thunk.
CallThunk *thunk = new CallThunk(offset);
__ call(&thunk->call);
if (!thunks_.append(thunk))
return false;
} else {
// Function is already emitted, we can do a direct call.
__ call(ExternalAddress(fun->GetEntryAddress()));
}
// Restore the last cip.
__ movl(Operand(cipAddr()), cip);
// Mark us as leaving the last frame.
__ movl(tmp, intptr_t(rt_->GetBaseContext()->GetCtx()));
__ subl(Operand(tmp, offsetof(sp_context_t, rp)), 1);
return true;
}
void
Compiler::emitCallThunks()
{
for (size_t i = 0; i < thunks_.length(); i++) {
CallThunk *thunk = thunks_[i];
Label error;
__ bind(&thunk->call);
// Huge hack - get the return address, since that is the call that we
// need to patch.
__ movl(eax, Operand(esp, 0));
// We need to push 4 arguments, and one of them will need an extra word
// on the stack. Allocate a big block so we're aligned, subtracting
// 4 because we got here via a call.
static const size_t kStackNeeded = 5 * sizeof(void *);
static const size_t kStackReserve = ke::Align(kStackNeeded, 16) - sizeof(void *);
__ subl(esp, kStackReserve);
// Set arguments.
__ movl(Operand(esp, 3 * sizeof(void *)), eax);
__ lea(edx, Operand(esp, 4 * sizeof(void *)));
__ movl(Operand(esp, 2 * sizeof(void *)), edx);
__ movl(Operand(esp, 1 * sizeof(void *)), intptr_t(thunk->pcode_offset));
__ movl(Operand(esp, 0 * sizeof(void *)), intptr_t(rt_));
__ call(ExternalAddress((void *)CompileFromThunk));
__ movl(edx, Operand(esp, 4 * sizeof(void *)));
__ addl(esp, kStackReserve);
__ testl(eax, eax);
__ j(not_zero, &error);
__ jmp(edx);
__ bind(&error);
__ movl(Operand(cipAddr()), thunk->pcode_offset);
__ jmp(g_Jit.GetUniversalReturn());
}
}
cell_t
Compiler::readCell()
{
if (cip_ >= code_end_) {
error_= SP_ERROR_INVALID_INSTRUCTION;
return 0;
}
return *cip_++;
}
bool
Compiler::emitNativeCall(OPCODE op)
{
uint32_t native_index = readCell();
if (native_index >= plugin_->num_natives) {
error_ = SP_ERROR_INSTRUCTION_PARAM;
return false;
}
uint32_t num_params;
if (op == OP_SYSREQ_N) {
num_params = readCell();
// See if we can get a replacement opcode. If we can, then recursively
// call emitOp() to generate it. Note: it's important that we do this
// before generating any code for the SYSREQ.N.
unsigned replacement = rt_->GetNativeReplacement(native_index);
if (replacement != OP_NOP)
return emitOp((OPCODE)replacement);
// Store the number of parameters.
__ movl(Operand(stk, -4), num_params);
__ subl(stk, 4);
}
// Save registers.
__ push(edx);
// Push the last parameter for the C++ function.
__ push(stk);
// Relocate our absolute stk to be dat-relative, and update the context's
// view.
__ movl(eax, intptr_t(rt_->GetBaseContext()->GetCtx()));
__ subl(stk, dat);
__ movl(Operand(eax, offsetof(sp_context_t, sp)), stk);
__ movl(Operand(eax, offsetof(sp_context_t, n_idx)), native_index);
sp_native_t *native = rt_->GetNativeByIndex(native_index);
if ((native->status != SP_NATIVE_BOUND) ||
(native->flags & (SP_NTVFLAG_OPTIONAL | SP_NTVFLAG_EPHEMERAL)))
{
// The native is either unbound, or it could become unbound in the
// future. Invoke the slower native callback.
__ push(native_index);
__ push(eax);
__ call(ExternalAddress((void *)NativeCallback));
} else {
// The native is bound so we have a few more guarantees.
__ push(intptr_t(native->pfn));
__ push(eax);
__ call(ExternalAddress((void *)BoundNativeCallback));
}
// Check for errors.
__ movl(ecx, intptr_t(rt_->GetBaseContext()->GetCtx()));
__ movl(ecx, Operand(ecx, offsetof(sp_context_t, n_err)));
__ testl(ecx, ecx);
__ j(not_zero, &extern_error_);
// Restore local state.
__ addl(stk, dat);
__ addl(esp, 12);
__ pop(edx);
if (op == OP_SYSREQ_N) {
// Pop the stack. Do not check the margins.
__ addl(stk, (num_params + 1) * sizeof(cell_t));
}
return true;
}
bool
Compiler::emitSwitch()
{
cell_t offset = readCell();
if (!labelAt(offset))
return false;
cell_t *tbl = (cell_t *)((char *)plugin_->pcode + offset + sizeof(cell_t));
struct Entry {
cell_t val;
cell_t offset;
};
size_t ncases = *tbl++;
Label *defaultCase = labelAt(*tbl);
if (!defaultCase)
return false;
// Degenerate - 0 cases.
if (!ncases) {
__ jmp(defaultCase);
return true;
}
Entry *cases = (Entry *)(tbl + 1);
// Degenerate - 1 case.
if (ncases == 1) {
Label *maybe = labelAt(cases[0].offset);
if (!maybe)
return false;
__ cmpl(pri, cases[0].val);
__ j(equal, maybe);
__ jmp(defaultCase);
return true;
}
// We have two or more cases, so let's generate a full switch. Decide
// whether we'll make an if chain, or a jump table, based on whether
// the numbers are strictly sequential.
bool sequential = true;
{
cell_t first = cases[0].val;
cell_t last = first;
for (size_t i = 1; i < ncases; i++) {
if (cases[i].val != ++last) {
sequential = false;
break;
}
}
}
// First check whether the bounds are correct: if (a < LOW || a > HIGH);
// this check is valid whether or not we emit a sequential-optimized switch.
cell_t low = cases[0].val;
if (low != 0) {
// negate it so we'll get a lower bound of 0.
low = -low;
__ lea(tmp, Operand(pri, low));
} else {
__ movl(tmp, pri);
}
cell_t high = abs(cases[0].val - cases[ncases - 1].val);
__ cmpl(tmp, high);
__ j(above, defaultCase);
if (sequential) {
// Optimized table version. The tomfoolery below is because we only have
// one free register... it seems unlikely pri or alt will be used given
// that we're at the end of a control-flow point, but we'll play it safe.
DataLabel table;
__ push(eax);
__ movl(eax, &table);
__ movl(ecx, Operand(eax, ecx, ScaleFour));
__ pop(eax);
__ jmp(ecx);
__ bind(&table);
for (size_t i = 0; i < ncases; i++) {
Label *label = labelAt(cases[i].offset);
if (!label)
return false;
__ emit_absolute_address(label);
}
} else {
// Slower version. Go through each case and generate a check.
for (size_t i = 0; i < ncases; i++) {
Label *label = labelAt(cases[i].offset);
if (!label)
return false;
__ cmpl(pri, cases[i].val);
__ j(equal, label);
}
__ jmp(defaultCase);
}
return true;
}
void
Compiler::emitErrorPath(Label *dest, int code)
{
if (dest->used()) {
__ bind(dest);
__ movl(eax, code);
__ jmp(g_Jit.GetUniversalReturn());
}
}
void
Compiler::emitFloatCmp(ConditionCode cc)
{
unsigned lhs = 4;
unsigned rhs = 0;
if (cc == below || cc == below_equal) {
// NaN results in ZF=1 PF=1 CF=1
//
// ja/jae check for ZF,CF=0 and CF=0. If we make all relational compares
// look like ja/jae, we'll guarantee all NaN comparisons will fail (which
// would not be true for jb/jbe, unless we checked with jp).
if (cc == below)
cc = above;
else
cc = above_equal;
rhs = 4;
lhs = 0;
}
if (MacroAssemblerX86::Features().sse) {
__ movss(xmm0, Operand(stk, rhs));
__ ucomiss(Operand(stk, lhs), xmm0);
} else {
__ fld32(Operand(stk, rhs));
__ fld32(Operand(stk, lhs));
__ fucomip(st1);
__ fstp(st0);
}
// An equal or not-equal needs special handling for the parity bit.
if (cc == equal || cc == not_equal) {
// If NaN, PF=1, ZF=1, and E/Z tests ZF=1.
//
// If NaN, PF=1, ZF=1 and NE/NZ tests Z=0. But, we want any != with NaNs
// to return true, including NaN != NaN.
//
// To make checks simpler, we set |eax| to the expected value of a NaN
// beforehand. This also clears the top bits of |eax| for setcc.
Label done;
__ movl(eax, (cc == equal) ? 0 : 1);
__ j(parity, &done);
__ set(cc, r8_al);
__ bind(&done);
} else {
__ movl(eax, 0);
__ set(cc, r8_al);
}
__ addl(stk, 8);
}
void
Compiler::emitErrorPaths()
{
emitErrorPath(&error_divide_by_zero_, SP_ERROR_DIVIDE_BY_ZERO);
emitErrorPath(&error_stack_low_, SP_ERROR_STACKLOW);
emitErrorPath(&error_stack_min_, SP_ERROR_STACKMIN);
emitErrorPath(&error_bounds_, SP_ERROR_ARRAY_BOUNDS);
emitErrorPath(&error_memaccess_, SP_ERROR_MEMACCESS);
emitErrorPath(&error_heap_low_, SP_ERROR_HEAPLOW);
emitErrorPath(&error_heap_min_, SP_ERROR_HEAPMIN);
emitErrorPath(&error_integer_overflow_, SP_ERROR_INTEGER_OVERFLOW);
if (extern_error_.used()) {
__ bind(&extern_error_);
__ movl(eax, intptr_t(rt_->GetBaseContext()->GetCtx()));
__ movl(eax, Operand(eax, offsetof(sp_context_t, n_err)));
__ jmp(g_Jit.GetUniversalReturn());
}
}
typedef int (*JIT_EXECUTE)(sp_context_t *ctx, uint8_t *memory, void *code);
static void *
GenerateEntry(void **retp, void **timeoutp)
{
AssemblerX86 masm;
__ push(ebp);
__ movl(ebp, esp);
__ push(esi); // ebp - 4
__ push(edi); // ebp - 8
__ push(ebx); // ebp - 12
__ push(esp); // ebp - 16
__ movl(ebx, Operand(ebp, 8 + 4 * 0));
__ movl(eax, Operand(ebp, 8 + 4 * 1));
__ movl(ecx, Operand(ebp, 8 + 4 * 2));
// Set up run-time registers.
__ movl(edi, Operand(ebx, offsetof(sp_context_t, sp)));
__ addl(edi, eax);
__ movl(esi, eax);
__ movl(ebx, edi);
// Align the stack.
__ andl(esp, 0xfffffff0);
// Call into plugin (align the stack first).
__ call(ecx);
// Get input context, store rval.
__ movl(ecx, Operand(ebp, 8 + 4 * 0));
__ movl(Operand(ecx, offsetof(sp_context_t, rval)), pri);
// Set no error.
__ movl(eax, SP_ERROR_NONE);
// Store latest stk. If we have an error code, we'll jump directly to here,
// so eax will already be set.
Label ret;
__ bind(&ret);
__ subl(stk, dat);
__ movl(Operand(ecx, offsetof(sp_context_t, sp)), stk);
// Restore stack.
__ movl(esp, Operand(ebp, -16));
// Restore registers and gtfo.
__ pop(ebx);
__ pop(edi);
__ pop(esi);
__ pop(ebp);
__ ret();
// The universal emergency return will jump to here.
Label error;
__ bind(&error);
__ movl(ecx, Operand(ebp, 8 + 4 * 0)); // ret-path expects ecx = ctx
__ jmp(&ret);
Label timeout;
__ bind(&timeout);
__ movl(eax, SP_ERROR_TIMEOUT);
__ jmp(&error);
void *code = LinkCode(masm);
if (!code)
return NULL;
*retp = reinterpret_cast<uint8_t *>(code) + error.offset();
*timeoutp = reinterpret_cast<uint8_t *>(code) + timeout.offset();
return code;
}
ICompilation *JITX86::ApplyOptions(ICompilation *_IN, ICompilation *_OUT)
{
if (_IN == NULL)
return _OUT;
CompData *_in = (CompData * )_IN;
CompData *_out = (CompData * )_OUT;
_in->inline_level = _out->inline_level;
_in->profile = _out->profile;
_out->Abort();
return _in;
}
JITX86::JITX86()
{
m_pJitEntry = NULL;
}
bool
JITX86::InitializeJIT()
{
g_pCodeCache = KE_CreateCodeCache();
m_pJitEntry = GenerateEntry(&m_pJitReturn, &m_pJitTimeout);
if (!m_pJitEntry)
return false;
MacroAssemblerX86 masm;
MacroAssemblerX86::GenerateFeatureDetection(masm);
void *code = LinkCode(masm);
if (!code)
return false;
MacroAssemblerX86::RunFeatureDetection(code);
KE_FreeCode(g_pCodeCache, code);
return true;
}
void
JITX86::ShutdownJIT()
{
KE_DestroyCodeCache(g_pCodeCache);
}
JitFunction *
JITX86::CompileFunction(BaseRuntime *prt, cell_t pcode_offs, int *err)
{
Compiler cc(prt, pcode_offs);
JitFunction *fun = cc.emit(err);
if (!fun)
return NULL;
// Grab the lock before linking code in, since the watchdog timer will look
// at this list on another thread.
ke::AutoLock lock(g_Jit.Mutex());
prt->AddJittedFunction(fun);
return fun;
}
void
JITX86::SetupContextVars(BaseRuntime *runtime, BaseContext *pCtx, sp_context_t *ctx)
{
ctx->tracker = new tracker_t;
ctx->tracker->pBase = (ucell_t *)malloc(1024);
ctx->tracker->pCur = ctx->tracker->pBase;
ctx->tracker->size = 1024 / sizeof(cell_t);
ctx->basecx = pCtx;
ctx->plugin = const_cast<sp_plugin_t *>(runtime->plugin());
}
SPVM_NATIVE_FUNC
JITX86::CreateFakeNative(SPVM_FAKENATIVE_FUNC callback, void *pData)
{
AssemblerX86 masm;
__ push(ebx);
__ push(edi);
__ push(esi);
__ movl(edi, Operand(esp, 16)); // store ctx
__ movl(esi, Operand(esp, 20)); // store params
__ movl(ebx, esp);
__ andl(esp, 0xfffffff0);
__ subl(esp, 4);
__ push(intptr_t(pData));
__ push(esi);
__ push(edi);
__ call(ExternalAddress((void *)callback));
__ movl(esp, ebx);
__ pop(esi);
__ pop(edi);
__ pop(ebx);
__ ret();
return (SPVM_NATIVE_FUNC)LinkCode(masm);
}
void
JITX86::DestroyFakeNative(SPVM_NATIVE_FUNC func)
{
KE_FreeCode(g_pCodeCache, (void *)func);
}
ICompilation *
JITX86::StartCompilation()
{
return new CompData;
}
ICompilation *
JITX86::StartCompilation(BaseRuntime *runtime)
{
return new CompData;
}
void
CompData::Abort()
{
delete this;
}
void
JITX86::FreeContextVars(sp_context_t *ctx)
{
free(ctx->tracker->pBase);
delete ctx->tracker;
}
bool
CompData::SetOption(const char *key, const char *val)
{
if (strcmp(key, SP_JITCONF_DEBUG) == 0)
return true;
if (strcmp(key, SP_JITCONF_PROFILE) == 0) {
profile = atoi(val);
/** Callbacks must be profiled to profile functions! */
if ((profile & SP_PROF_FUNCTIONS) == SP_PROF_FUNCTIONS)
profile |= SP_PROF_CALLBACKS;
return true;
}
return false;
}
int
JITX86::InvokeFunction(BaseRuntime *runtime, JitFunction *fn, cell_t *result)
{
sp_context_t *ctx = runtime->GetBaseContext()->GetCtx();
// Note that cip, hp, sp are saved and restored by Execute2().
ctx->cip = fn->GetPCodeAddress();
JIT_EXECUTE pfn = (JIT_EXECUTE)m_pJitEntry;
if (level_++ == 0)
frame_id_++;
int err = pfn(ctx, runtime->plugin()->memory, fn->GetEntryAddress());
level_--;
*result = ctx->rval;
return err;
}
void *
JITX86::AllocCode(size_t size)
{
return Knight::KE_AllocCode(g_pCodeCache, size);
}
void
JITX86::FreeCode(void *code)
{
KE_FreeCode(g_pCodeCache, code);
}
void
JITX86::RegisterRuntime(BaseRuntime *rt)
{
mutex_.AssertCurrentThreadOwns();
runtimes_.append(rt);
}
void
JITX86::DeregisterRuntime(BaseRuntime *rt)
{
mutex_.AssertCurrentThreadOwns();
runtimes_.remove(rt);
}
void
JITX86::PatchAllJumpsForTimeout()
{
mutex_.AssertCurrentThreadOwns();
for (ke::InlineList<BaseRuntime>::iterator iter = runtimes_.begin(); iter != runtimes_.end(); iter++) {
BaseRuntime *rt = *iter;
for (size_t i = 0; i < rt->NumJitFunctions(); i++) {
JitFunction *fun = rt->GetJitFunction(i);
uint8_t *base = reinterpret_cast<uint8_t *>(fun->GetEntryAddress());
for (size_t j = 0; j < fun->NumLoopEdges(); j++) {
const LoopEdge &e = fun->GetLoopEdge(j);
int32_t diff = intptr_t(m_pJitTimeout) - intptr_t(base + e.offset);
*reinterpret_cast<int32_t *>(base + e.offset - 4) = diff;
}
}
}
}
void
JITX86::UnpatchAllJumpsFromTimeout()
{
mutex_.AssertCurrentThreadOwns();
for (ke::InlineList<BaseRuntime>::iterator iter = runtimes_.begin(); iter != runtimes_.end(); iter++) {
BaseRuntime *rt = *iter;
for (size_t i = 0; i < rt->NumJitFunctions(); i++) {
JitFunction *fun = rt->GetJitFunction(i);
uint8_t *base = reinterpret_cast<uint8_t *>(fun->GetEntryAddress());
for (size_t j = 0; j < fun->NumLoopEdges(); j++) {
const LoopEdge &e = fun->GetLoopEdge(j);
*reinterpret_cast<int32_t *>(base + e.offset - 4) = e.disp32;
}
}
}
}