From e60940834bab7c7700301fa61f7c2054c7a04a07 Mon Sep 17 00:00:00 2001 From: Borja Ferrer Date: Tue, 22 May 2007 15:15:51 +0000 Subject: [PATCH] added floating point optimizations to the JIT standarised a bit more x86_macros.h some asm optimizations to bintools extension --HG-- extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%40837 --- core/smn_float.cpp | 68 ++--- extensions/bintools/jit_call.cpp | 32 +-- plugins/include/float.inc | 40 ++- public/jit/x86/x86_macros.h | 360 ++++++++++++++++++++---- sourcepawn/jit/x86/jit_x86.cpp | 386 +++++++++++++++++++++++++- sourcepawn/jit/x86/jit_x86.h | 13 + sourcepawn/jit/x86/opcode_helpers.cpp | 7 + sourcepawn/jit/x86/opcode_helpers.h | 18 +- sourcepawn/jit/x86/opcode_switch.inc | 57 +++- 9 files changed, 852 insertions(+), 129 deletions(-) diff --git a/core/smn_float.cpp b/core/smn_float.cpp index e5585640..ffae5361 100644 --- a/core/smn_float.cpp +++ b/core/smn_float.cpp @@ -34,7 +34,7 @@ static cell_t sm_float(IPluginContext *pCtx, const cell_t *params) static cell_t sm_FloatAbs(IPluginContext *pCtx, const cell_t *params) { float val = sp_ctof(params[1]); - val = (val >= 0) ? val : -val; + val = (val >= 0.0f) ? val : -val; return sp_ftoc(val); } @@ -87,11 +87,11 @@ static cell_t sm_Logarithm(IPluginContext *pCtx, const cell_t *params) float val = sp_ctof(params[1]); float base = sp_ctof(params[2]); - if ((val <= 0) || (base <= 0)) + if ((val <= 0.0f) || (base <= 0.0f)) { return pCtx->ThrowNativeError("Cannot evaluate the logarithm of zero or a negative number (val:%f base:%f)", val, base); } - if (base == 10.0) + if (base == 10.0f) { val = log10(val); } else { @@ -120,7 +120,7 @@ static cell_t sm_SquareRoot(IPluginContext *pCtx, const cell_t *params) { float val = sp_ctof(params[1]); - if (val < 0.0) + if (val < 0.0f) { return pCtx->ThrowNativeError("Cannot evaluate the square root of a negative number (val:%f)", val); } @@ -128,37 +128,38 @@ static cell_t sm_SquareRoot(IPluginContext *pCtx, const cell_t *params) return sp_ftoc(sqrt(val)); } -static cell_t sm_FloatRound(IPluginContext *pCtx, const cell_t *params) +static cell_t sm_RountToNearest(IPluginContext *pCtx, const cell_t *params) { float val = sp_ctof(params[1]); + val = (float)floor(val + 0.5f); - switch (params[2]) + return static_cast(val); +} + +static cell_t sm_RoundToFloor(IPluginContext *pCtx, const cell_t *params) +{ + float val = sp_ctof(params[1]); + val = floor(val); + + return static_cast(val); +} + +static cell_t sm_RoundToCeil(IPluginContext *pCtx, const cell_t *params) +{ + float val = sp_ctof(params[1]); + val = ceil(val); + + return static_cast(val); +} + +static cell_t sm_RoundToZero(IPluginContext *pCtx, const cell_t *params) +{ + float val = sp_ctof(params[1]); + if (val >= 0.0f) { - case 1: - { - val = floor(val); - break; - } - case 2: - { - val = ceil(val); - break; - } - case 3: - { - if (val >= 0.0) - { - val = floor(val); - } else { - val = ceil(val); - } - break; - } - default: - { - val = (float)floor(val + 0.5); - break; - } + val = floor(val); + } else { + val = ceil(val); } return static_cast(val); @@ -237,7 +238,10 @@ REGISTER_NATIVES(floatnatives) {"FloatAdd", sm_FloatAdd}, {"FloatSub", sm_FloatSub}, {"FloatFraction", sm_FloatFraction}, - {"FloatRound", sm_FloatRound}, + {"RoundToZero", sm_RoundToZero}, + {"RoundToCeil", sm_RoundToCeil}, + {"RoundToFloor", sm_RoundToFloor}, + {"RountToNearest", sm_RountToNearest}, {"FloatCompare", sm_FloatCompare}, {"SquareRoot", sm_SquareRoot}, {"Pow", sm_Pow}, diff --git a/extensions/bintools/jit_call.cpp b/extensions/bintools/jit_call.cpp index 00fa78e7..b9a567e4 100644 --- a/extensions/bintools/jit_call.cpp +++ b/extensions/bintools/jit_call.cpp @@ -116,17 +116,15 @@ inline void Write_PushPOD(JitWriter *jit, const PassEncode *pEnc) { case 1: { - //xor reg, reg - //mov reg, BYTE PTR [ebx+] + //movzx reg, BYTE PTR [ebx+] //push reg - IA32_Xor_Reg_Rm(jit, reg, reg, MOD_REG); if (pEnc->offset < SCHAR_MAX) { - IA32_Mov_Reg8_Rm8_Disp8(jit, reg, REG_EBX, (jit_int8_t)pEnc->offset); + IA32_Movzx_Reg32_Rm8_Disp8(jit, reg, REG_EBX, (jit_int8_t)pEnc->offset); } else if (!pEnc->offset) { - IA32_Mov_Reg8_Rm8(jit, reg, REG_EBX, MOD_MEM_REG); + IA32_Movzx_Reg32_Rm8(jit, reg, REG_EBX, MOD_MEM_REG); } else { - IA32_Mov_Reg8_Rm8_Disp32(jit, reg, REG_EBX, pEnc->offset); + IA32_Movzx_Reg32_Rm8_Disp32(jit, reg, REG_EBX, pEnc->offset); } IA32_Push_Reg(jit, reg); @@ -135,18 +133,16 @@ inline void Write_PushPOD(JitWriter *jit, const PassEncode *pEnc) } case 2: { - //xor reg, reg - //mov reg, WORD PTR [ebx+] + //movzx reg, WORD PTR [ebx+] //push reg - IA32_Xor_Reg_Rm(jit, reg, reg, MOD_REG); jit->write_ubyte(IA32_16BIT_PREFIX); if (pEnc->offset < SCHAR_MAX) { - IA32_Mov_Reg_Rm_Disp8(jit, reg, REG_EBX, (jit_int8_t)pEnc->offset); + IA32_Movzx_Reg32_Rm16_Disp8(jit, reg, REG_EBX, (jit_int8_t)pEnc->offset); } else if (!pEnc->offset) { - IA32_Mov_Reg_Rm(jit, reg, REG_EBX, MOD_MEM_REG); + IA32_Movzx_Reg32_Rm16(jit, reg, REG_EBX, MOD_MEM_REG); } else { - IA32_Mov_Reg_Rm_Disp32(jit, reg, REG_EBX, pEnc->offset); + IA32_Movzx_Reg32_Rm16_Disp32(jit, reg, REG_EBX, pEnc->offset); } IA32_Push_Reg(jit, reg); @@ -234,11 +230,13 @@ inline void Write_PushFloat(JitWriter *jit, const PassEncode *pEnc) if (pEnc->offset < SCHAR_MAX) { IA32_Fld_Mem32_Disp8(jit, REG_EBX, (jit_int8_t)pEnc->offset); + } else if (!pEnc->offset) { + IA32_Fld_Mem32(jit, REG_EBX); } else { IA32_Fld_Mem32_Disp32(jit, REG_EBX, pEnc->offset); } IA32_Push_Reg(jit, _DecodeRegister3(g_RegDecoder++)); - IA32_Fstp_Mem32(jit, REG_ESP, REG_NOIDX, NOSCALE); + IA32_Fstp_Mem32_ESP(jit); g_StackUsage += 4; break; } @@ -250,11 +248,13 @@ inline void Write_PushFloat(JitWriter *jit, const PassEncode *pEnc) if (pEnc->offset < SCHAR_MAX) { IA32_Fld_Mem64_Disp8(jit, REG_EBX, (jit_int8_t)pEnc->offset); + } else if (!pEnc->offset) { + IA32_Fld_Mem64(jit, REG_EBX); } else { IA32_Fld_Mem64_Disp32(jit, REG_EBX, pEnc->offset); } IA32_Sub_Rm_Imm8(jit, REG_ESP, 8, MOD_REG); - IA32_Fstp_Mem64(jit, REG_ESP, REG_NOIDX, NOSCALE); + IA32_Fstp_Mem64_ESP(jit); g_StackUsage += 8; break; } @@ -449,13 +449,13 @@ inline void Write_MovRet2Buf(JitWriter *jit, const PassInfo *pRet) case 4: { //fstp DWORD PTR [edi] - IA32_Fstp_Mem32(jit, REG_EDI, REG_NOIDX, NOSCALE); + IA32_Fstp_Mem32(jit, REG_EDI); break; } case 8: { //fstp QWORD PTR [edi] - IA32_Fstp_Mem64(jit, REG_EDI, REG_NOIDX, NOSCALE); + IA32_Fstp_Mem64(jit, REG_EDI); break; } } diff --git a/plugins/include/float.inc b/plugins/include/float.inc index 9914220f..619a18f8 100644 --- a/plugins/include/float.inc +++ b/plugins/include/float.inc @@ -18,17 +18,6 @@ #endif #define _float_included -/** - * Different methods of rounding. - */ -enum floatround_method -{ - floatround_round = 0, /**< Standard IEEE rounding */ - floatround_floor, /**< Next lowest integer value. */ - floatround_ceil, /**< Next highest integer value. */ - floatround_tozero /** Closest integer to zero. */ -}; - /** * Converts an integer into a floating point value. * @@ -82,13 +71,36 @@ native Float:FloatSub(Float:oper1, Float:oper2); native Float:FloatFraction(Float:value); /** - * Rounds a float into an integer number. + * Rounds a float to the closest integer to zero. * * @param value Input value to be rounded. - * @param method Rounding method to use. * @return Rounded value. */ -native FloatRound(Float:value, floatround_method:method=floatround_round); +native RoundToZero(Float:value); + +/** + * Rounds a float to the next highest integer value. + * + * @param value Input value to be rounded. + * @return Rounded value. + */ +native RoundToCeil(Float:value); + +/** + * Rounds a float to the next lowest integer value. + * + * @param value Input value to be rounded. + * @return Rounded value. + */ +native RoundToFloor(Float:value); + +/** + * Standard IEEE rounding. + * + * @param value Input value to be rounded. + * @return Rounded value. + */ +native RountToNearest(Float:value); /** * Compares two floats. diff --git a/public/jit/x86/x86_macros.h b/public/jit/x86/x86_macros.h index 7f307a8d..72f5c29c 100644 --- a/public/jit/x86/x86_macros.h +++ b/public/jit/x86/x86_macros.h @@ -91,7 +91,7 @@ #define IA32_MOV_REG_IMM 0xB8 // encoding is +r #define IA32_MOV_RM8_REG 0x88 // encoding is /r #define IA32_MOV_RM_REG 0x89 // encoding is /r -#define IA32_MOV_REG_MEM 0x8B // encoding is /r +#define IA32_MOV_REG_RM 0x8B // encoding is /r #define IA32_MOV_REG8_RM8 0x8A // encoding is /r #define IA32_MOV_RM8_REG8 0x88 // encoding is /r #define IA32_MOV_RM_IMM32 0xC7 // encoding is /0 @@ -130,11 +130,14 @@ #define IA32_SHL_RM_IMM8 0xC1 // encoding is /4 #define IA32_SHL_RM_1 0xD1 // encoding is /4 #define IA32_SAR_RM_CL 0xD3 // encoding is /7 +#define IA32_SAR_RM_1 0xD1 // encoding is /7 #define IA32_SHR_RM_CL 0xD3 // encoding is /5 #define IA32_SHL_RM_CL 0xD3 // encoding is /4 #define IA32_SAR_RM_IMM8 0xC1 // encoding is /7 #define IA32_SETCC_RM8_1 0x0F // opcode part 1 #define IA32_SETCC_RM8_2 0x90 // encoding is +cc /0 (8bits) +#define IA32_CMOVCC_RM_1 0x0F // opcode part 1 +#define IA32_CMOVCC_RM_2 0x40 // encoding is +cc /r #define IA32_XCHG_EAX_REG 0x90 // encoding is +r #define IA32_LEA_REG_MEM 0x8D // encoding is /r #define IA32_POP_REG 0x58 // encoding is +r @@ -155,6 +158,25 @@ #define IA32_FSTP_MEM64 0xDD // encoding is /3 #define IA32_FLD_MEM32 0xD9 // encoding is /0 #define IA32_FLD_MEM64 0xDD // encoding is /0 +#define IA32_FILD_MEM32 0xDB // encoding is /0 +#define IA32_FADD_MEM32 0xD8 // encoding is /0 +#define IA32_FADD_FPREG_ST0_1 0xDC // opcode part 1 +#define IA32_FADD_FPREG_ST0_2 0xC0 // encoding is +r +#define IA32_FSUB_MEM32 0xD8 // encoding is /4 +#define IA32_FMUL_MEM32 0xD8 // encoding is /1 +#define IA32_FDIV_MEM32 0xD8 // encoding is /6 +#define IA32_FSTCW_MEM16_1 0x9B // opcode part 1 +#define IA32_FSTCW_MEM16_2 0xD9 // encoding is /7 +#define IA32_FLDCW_MEM16 0xD9 // encoding is /5 +#define IA32_FISTP_MEM32 0xDB // encoding is /3 +#define IA32_FUCOMIP_1 0xDF // opcode part 1 +#define IA32_FUCOMIP_2 0xE8 // encoding is +r +#define IA32_FSTP_FPREG_1 0xDD // opcode part 1 +#define IA32_FSTP_FPREG_2 0xD8 // encoding is +r +#define IA32_MOVZX_R32_RM8_1 0x0F // opcode part 1 +#define IA32_MOVZX_R32_RM8_2 0xB6 // encoding is /r +#define IA32_MOVZX_R32_RM16_1 0x0F // opcode part 1 +#define IA32_MOVZX_R32_RM16_2 0xB7 // encoding is /r inline jit_uint8_t ia32_modrm(jit_uint8_t mode, jit_uint8_t reg, jit_uint8_t rm) { @@ -183,11 +205,9 @@ inline jit_uint8_t ia32_sib(jit_uint8_t mode, jit_uint8_t index, jit_uint8_t bas * INCREMENT/DECREMENT * ***********************/ -inline void IA32_Inc_Rm_Disp32(JitWriter *jit, jit_uint8_t reg, jit_int32_t disp) +inline void IA32_Inc_Reg(JitWriter *jit, jit_uint8_t reg) { - jit->write_ubyte(IA32_INC_RM); - jit->write_ubyte(ia32_modrm(MOD_DISP32, 0, reg)); - jit->write_int32(disp); + jit->write_ubyte(IA32_INC_REG+reg); } inline void IA32_Inc_Rm_Disp8(JitWriter *jit, jit_uint8_t reg, jit_int8_t disp) @@ -197,6 +217,13 @@ inline void IA32_Inc_Rm_Disp8(JitWriter *jit, jit_uint8_t reg, jit_int8_t disp) jit->write_byte(disp); } +inline void IA32_Inc_Rm_Disp32(JitWriter *jit, jit_uint8_t reg, jit_int32_t disp) +{ + jit->write_ubyte(IA32_INC_RM); + jit->write_ubyte(ia32_modrm(MOD_DISP32, 0, reg)); + jit->write_int32(disp); +} + inline void IA32_Inc_Rm_Disp_Reg(JitWriter *jit, jit_uint8_t base, jit_uint8_t reg, jit_uint8_t scale) { jit->write_ubyte(IA32_INC_RM); @@ -204,16 +231,9 @@ inline void IA32_Inc_Rm_Disp_Reg(JitWriter *jit, jit_uint8_t base, jit_uint8_t r jit->write_ubyte(ia32_sib(scale, reg, base)); } -inline void IA32_Inc_Reg(JitWriter *jit, jit_uint8_t reg) +inline void IA32_Dec_Reg(JitWriter *jit, jit_uint8_t reg) { - jit->write_ubyte(IA32_INC_REG+reg); -} - -inline void IA32_Dec_Rm_Disp32(JitWriter *jit, jit_uint8_t reg, jit_int32_t disp) -{ - jit->write_ubyte(IA32_DEC_RM); - jit->write_ubyte(ia32_modrm(MOD_DISP32, 1, reg)); - jit->write_int32(disp); + jit->write_ubyte(IA32_DEC_REG+reg); } inline void IA32_Dec_Rm_Disp8(JitWriter *jit, jit_uint8_t reg, jit_int8_t disp) @@ -223,6 +243,13 @@ inline void IA32_Dec_Rm_Disp8(JitWriter *jit, jit_uint8_t reg, jit_int8_t disp) jit->write_byte(disp); } +inline void IA32_Dec_Rm_Disp32(JitWriter *jit, jit_uint8_t reg, jit_int32_t disp) +{ + jit->write_ubyte(IA32_DEC_RM); + jit->write_ubyte(ia32_modrm(MOD_DISP32, 1, reg)); + jit->write_int32(disp); +} + inline void IA32_Dec_Rm_Disp_Reg(JitWriter *jit, jit_uint8_t base, jit_uint8_t reg, jit_uint8_t scale) { jit->write_ubyte(IA32_DEC_RM); @@ -230,11 +257,6 @@ inline void IA32_Dec_Rm_Disp_Reg(JitWriter *jit, jit_uint8_t base, jit_uint8_t r jit->write_ubyte(ia32_sib(scale, reg, base)); } -inline void IA32_Dec_Reg(JitWriter *jit, jit_uint8_t reg) -{ - jit->write_ubyte(IA32_DEC_REG+reg); -} - /**************** * BINARY LOGIC * ****************/ @@ -251,10 +273,11 @@ inline void IA32_Xor_Reg_Rm(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, j jit->write_ubyte(ia32_modrm(dest_mode, dest, src)); } -inline void IA32_Xor_Eax_Imm32(JitWriter *jit, jit_int32_t value) +inline void IA32_Xor_Rm_Imm8(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode, jit_int8_t value) { - jit->write_ubyte(IA32_XOR_EAX_IMM32); - jit->write_int32(value); + jit->write_ubyte(IA32_XOR_RM_IMM8); + jit->write_ubyte(ia32_modrm(mode, 6, reg)); + jit->write_byte(value); } inline void IA32_Xor_Rm_Imm32(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode, jit_int32_t value) @@ -264,11 +287,10 @@ inline void IA32_Xor_Rm_Imm32(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode, jit->write_int32(value); } -inline void IA32_Xor_Rm_Imm8(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode, jit_int8_t value) +inline void IA32_Xor_Eax_Imm32(JitWriter *jit, jit_int32_t value) { - jit->write_ubyte(IA32_XOR_RM_IMM8); - jit->write_ubyte(ia32_modrm(mode, 6, reg)); - jit->write_byte(value); + jit->write_ubyte(IA32_XOR_EAX_IMM32); + jit->write_int32(value); } inline void IA32_Neg_Rm(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode) @@ -289,16 +311,17 @@ inline void IA32_And_Reg_Rm(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, j jit->write_ubyte(ia32_modrm(mode, dest, src)); } -inline void IA32_And_Rm_Imm32(JitWriter *jit, jit_uint8_t reg, jit_int32_t c) +inline void IA32_And_Rm_Imm32(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode, jit_int32_t value) { - if (reg == REG_EAX) - { - jit->write_ubyte(IA32_AND_EAX_IMM32); - } else { - jit->write_ubyte(IA32_AND_RM_IMM32); - jit->write_ubyte(ia32_modrm(MOD_REG, 4, reg)); - } - jit->write_int32(c); + jit->write_ubyte(IA32_AND_RM_IMM32); + jit->write_ubyte(ia32_modrm(mode, 4, reg)); + jit->write_int32(value); +} + +inline void IA32_And_Eax_Imm32(JitWriter *jit, jit_int32_t value) +{ + jit->write_ubyte(IA32_AND_EAX_IMM32); + jit->write_int32(value); } inline void IA32_Not_Rm(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode) @@ -346,6 +369,12 @@ inline void IA32_Sar_Rm_CL(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode) jit->write_ubyte(ia32_modrm(mode, 7, reg)); } +inline void IA32_Sar_Rm_1(JitWriter *jit, jit_uint8_t dest, jit_uint8_t mode) +{ + jit->write_ubyte(IA32_SAR_RM_1); + jit->write_ubyte(ia32_modrm(mode, 7, dest)); +} + inline void IA32_Shr_Rm_CL(JitWriter *jit, jit_uint8_t reg, jit_uint8_t mode) { jit->write_ubyte(IA32_SHR_RM_CL); @@ -731,7 +760,7 @@ inline void IA32_Push_Rm_Disp8_ESP(JitWriter *jit, jit_int8_t disp8) inline void IA32_Mov_Reg_Rm(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_uint8_t mode) { - jit->write_ubyte(IA32_MOV_REG_MEM); + jit->write_ubyte(IA32_MOV_REG_RM); jit->write_ubyte(ia32_modrm(mode, dest, src)); } @@ -741,9 +770,16 @@ inline void IA32_Mov_Reg8_Rm8(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit->write_ubyte(ia32_modrm(mode, dest, src)); } +inline void IA32_Mov_Reg_RmESP(JitWriter *jit, jit_uint8_t dest) +{ + jit->write_ubyte(IA32_MOV_REG_RM); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, dest, REG_ESP)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); +} + inline void IA32_Mov_Reg_Rm_Disp8(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_int8_t disp) { - jit->write_ubyte(IA32_MOV_REG_MEM); + jit->write_ubyte(IA32_MOV_REG_RM); jit->write_ubyte(ia32_modrm(MOD_DISP8, dest, src)); jit->write_byte(disp); } @@ -757,7 +793,7 @@ inline void IA32_Mov_Reg8_Rm8_Disp8(JitWriter *jit, jit_uint8_t dest, jit_uint8_ inline void IA32_Mov_Reg_Esp_Disp8(JitWriter *jit, jit_uint8_t dest, jit_int8_t disp) { - jit->write_ubyte(IA32_MOV_REG_MEM); + jit->write_ubyte(IA32_MOV_REG_RM); jit->write_ubyte(ia32_modrm(MOD_DISP8, dest, REG_SIB)); jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); jit->write_byte(disp); @@ -765,7 +801,7 @@ inline void IA32_Mov_Reg_Esp_Disp8(JitWriter *jit, jit_uint8_t dest, jit_int8_t inline void IA32_Mov_Reg_Rm_Disp32(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_int32_t disp) { - jit->write_ubyte(IA32_MOV_REG_MEM); + jit->write_ubyte(IA32_MOV_REG_RM); jit->write_ubyte(ia32_modrm(MOD_DISP32, dest, src)); jit->write_int32(disp); } @@ -783,7 +819,7 @@ inline void IA32_Mov_Reg_Rm_Disp_Reg(JitWriter *jit, jit_uint8_t src_index, jit_uint8_t src_scale) { - jit->write_ubyte(IA32_MOV_REG_MEM); + jit->write_ubyte(IA32_MOV_REG_RM); jit->write_ubyte(ia32_modrm(MOD_MEM_REG, dest, REG_SIB)); jit->write_ubyte(ia32_sib(src_scale, src_index, src_base)); } @@ -795,7 +831,7 @@ inline void IA32_Mov_Reg_Rm_Disp_Reg_Disp8(JitWriter *jit, jit_uint8_t src_scale, jit_int8_t disp8) { - jit->write_ubyte(IA32_MOV_REG_MEM); + jit->write_ubyte(IA32_MOV_REG_RM); jit->write_ubyte(ia32_modrm(MOD_DISP8, dest, REG_SIB)); jit->write_ubyte(ia32_sib(src_scale, src_index, src_base)); jit->write_byte(disp8); @@ -807,7 +843,7 @@ inline void IA32_Mov_Reg_RmEBP_Disp_Reg(JitWriter *jit, jit_uint8_t src_index, jit_uint8_t src_scale) { - jit->write_ubyte(IA32_MOV_REG_MEM); + jit->write_ubyte(IA32_MOV_REG_RM); jit->write_ubyte(ia32_modrm(MOD_DISP8, dest, REG_SIB)); jit->write_ubyte(ia32_sib(src_scale, src_index, src_base)); jit->write_byte(0); @@ -829,6 +865,13 @@ inline void IA32_Mov_Rm8_Reg8(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit->write_ubyte(ia32_modrm(mode, src, dest)); } +inline void IA32_Mov_RmESP_Reg(JitWriter *jit, jit_uint8_t src) +{ + jit->write_ubyte(IA32_MOV_RM_REG); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, src, REG_ESP)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); +} + inline void IA32_Mov_Rm_Reg_Disp8(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_int8_t disp) { jit->write_ubyte(IA32_MOV_RM_REG); @@ -893,6 +936,13 @@ inline jitoffs_t IA32_Mov_Reg_Imm32(JitWriter *jit, jit_uint8_t dest, jit_int32_ return offs; } +inline void IA32_Mov_Rm_Imm32(JitWriter *jit, jit_uint8_t dest, jit_int32_t val, jit_uint8_t mode) +{ + jit->write_ubyte(IA32_MOV_RM_IMM32); + jit->write_ubyte(ia32_modrm(mode, 0, dest)); + jit->write_int32(val); +} + inline void IA32_Mov_Rm_Imm32_Disp8(JitWriter *jit, jit_uint8_t dest, jit_int32_t val, @@ -904,13 +954,6 @@ inline void IA32_Mov_Rm_Imm32_Disp8(JitWriter *jit, jit->write_int32(val); } -inline void IA32_Mov_Rm_Imm32(JitWriter *jit, jit_uint8_t dest, jit_int32_t val, jit_uint8_t mode) -{ - jit->write_ubyte(IA32_MOV_RM_IMM32); - jit->write_ubyte(ia32_modrm(mode, 0, dest)); - jit->write_int32(val); -} - inline void IA32_Mov_Rm_Imm32_Disp32(JitWriter *jit, jit_uint8_t dest, jit_int32_t val, @@ -935,52 +978,232 @@ inline void IA32_Mov_RmEBP_Imm32_Disp_Reg(JitWriter *jit, jit->write_int32(val); } +inline void IA32_Mov_ESP_Disp8_Imm32(JitWriter *jit, jit_int8_t disp8, jit_int32_t val) +{ + jit->write_ubyte(IA32_MOV_RM_IMM32); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 0, REG_SIB)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); + jit->write_byte(disp8); + jit->write_int32(val); +} + /** * Floating Point Instructions */ -inline void IA32_Fstp_Mem32(JitWriter *jit, jit_uint8_t dest_base, jit_uint8_t dest_index, jit_uint8_t dest_scale) +inline void IA32_Fstcw_Mem16_ESP(JitWriter *jit) +{ + jit->write_ubyte(IA32_FSTCW_MEM16_1); + jit->write_ubyte(IA32_FSTCW_MEM16_2); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 7, REG_SIB)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); +} + +inline void IA32_Fldcw_Mem16_ESP(JitWriter *jit) +{ + jit->write_ubyte(IA32_FLDCW_MEM16); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 5, REG_SIB)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); +} + +inline void IA32_Fldcw_Mem16_Disp8_ESP(JitWriter *jit, jit_int8_t disp8) +{ + jit->write_ubyte(IA32_FLDCW_MEM16); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 5, REG_SIB)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); + jit->write_byte(disp8); +} + +inline void IA32_Fistp_Mem32_ESP(JitWriter *jit) +{ + jit->write_ubyte(IA32_FISTP_MEM32); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 3, REG_SIB)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); +} + +inline void IA32_Fistp_Mem32_Disp8_Esp(JitWriter *jit, jit_int8_t disp8) +{ + jit->write_ubyte(IA32_FISTP_MEM32); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 3, REG_SIB)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); + jit->write_byte(disp8); +} + +inline void IA32_Fucomip_ST0_FPUreg(JitWriter *jit, jit_uint8_t reg) +{ + jit->write_ubyte(IA32_FUCOMIP_1); + jit->write_ubyte(IA32_FUCOMIP_2+reg); +} + +inline void IA32_Fadd_FPUreg_ST0(JitWriter *jit, jit_uint8_t reg) +{ + jit->write_ubyte(IA32_FADD_FPREG_ST0_1); + jit->write_ubyte(IA32_FADD_FPREG_ST0_2+reg); +} + +inline void IA32_Fadd_Mem32_Disp8(JitWriter *jit, jit_uint8_t src, jit_int8_t val) +{ + jit->write_ubyte(IA32_FADD_MEM32); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 0, src)); + jit->write_byte(val); +} + +inline void IA32_Fadd_Mem32_ESP(JitWriter *jit) +{ + jit->write_ubyte(IA32_FADD_MEM32); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 0, REG_SIB)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); +} + +inline void IA32_Fsub_Mem32_Disp8(JitWriter *jit, jit_uint8_t src, jit_int8_t val) +{ + jit->write_ubyte(IA32_FSUB_MEM32); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 4, src)); + jit->write_byte(val); +} + +inline void IA32_Fmul_Mem32_Disp8(JitWriter *jit, jit_uint8_t src, jit_int8_t val) +{ + jit->write_ubyte(IA32_FMUL_MEM32); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 1, src)); + jit->write_byte(val); +} + +inline void IA32_Fdiv_Mem32_Disp8(JitWriter *jit, jit_uint8_t src, jit_int8_t val) +{ + jit->write_ubyte(IA32_FDIV_MEM32); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 6, src)); + jit->write_byte(val); +} + +inline void IA32_Fild_Mem32(JitWriter *jit, jit_uint8_t src) +{ + jit->write_ubyte(IA32_FILD_MEM32); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 0, src)); +} + +inline void IA32_Fstp_Mem32(JitWriter *jit, jit_uint8_t dest) +{ + jit->write_ubyte(IA32_FSTP_MEM32); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 3, dest)); +} + +inline void IA32_Fstp_Mem64(JitWriter *jit, jit_uint8_t dest) +{ + jit->write_ubyte(IA32_FSTP_MEM64); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 3, dest)); +} + +inline void IA32_Fstp_Mem32_ESP(JitWriter *jit) { jit->write_ubyte(IA32_FSTP_MEM32); jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 3, REG_SIB)); - jit->write_ubyte(ia32_sib(dest_scale, dest_index, dest_base)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); } -inline void IA32_Fstp_Mem64(JitWriter *jit, jit_uint8_t dest_base, jit_uint8_t dest_index, jit_uint8_t dest_scale) +inline void IA32_Fstp_Mem64_ESP(JitWriter *jit) { jit->write_ubyte(IA32_FSTP_MEM64); jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 3, REG_SIB)); - jit->write_ubyte(ia32_sib(dest_scale, dest_index, dest_base)); + jit->write_ubyte(ia32_sib(NOSCALE, REG_NOIDX, REG_ESP)); } -inline void IA32_Fld_Mem32_Disp8(JitWriter *jit, jit_uint8_t dest, jit_int8_t val) +inline void IA32_Fstp_FPUreg(JitWriter *jit, jit_uint8_t reg) +{ + jit->write_ubyte(IA32_FSTP_FPREG_1); + jit->write_ubyte(IA32_FSTP_FPREG_2+reg); +} + +inline void IA32_Fld_Mem32(JitWriter *jit, jit_uint8_t src) { jit->write_ubyte(IA32_FLD_MEM32); - jit->write_ubyte(ia32_modrm(MOD_DISP8, 0, dest)); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 0, src)); +} + +inline void IA32_Fld_Mem64(JitWriter *jit, jit_uint8_t src) +{ + jit->write_ubyte(IA32_FLD_MEM64); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 0, src)); +} + +inline void IA32_Fld_Mem32_Disp8(JitWriter *jit, jit_uint8_t src, jit_int8_t val) +{ + jit->write_ubyte(IA32_FLD_MEM32); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 0, src)); jit->write_byte(val); } -inline void IA32_Fld_Mem64_Disp8(JitWriter *jit, jit_uint8_t dest, jit_int8_t val) +inline void IA32_Fld_Mem64_Disp8(JitWriter *jit, jit_uint8_t src, jit_int8_t val) { jit->write_ubyte(IA32_FLD_MEM64); - jit->write_ubyte(ia32_modrm(MOD_DISP8, 0, dest)); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 0, src)); jit->write_byte(val); } -inline void IA32_Fld_Mem32_Disp32(JitWriter *jit, jit_uint8_t dest, jit_int32_t val) +inline void IA32_Fld_Mem32_Disp32(JitWriter *jit, jit_uint8_t src, jit_int32_t val) { jit->write_ubyte(IA32_FLD_MEM32); - jit->write_ubyte(ia32_modrm(MOD_DISP32, 0, dest)); + jit->write_ubyte(ia32_modrm(MOD_DISP32, 0, src)); jit->write_int32(val); } -inline void IA32_Fld_Mem64_Disp32(JitWriter *jit, jit_uint8_t dest, jit_int32_t val) +inline void IA32_Fld_Mem64_Disp32(JitWriter *jit, jit_uint8_t src, jit_int32_t val) { jit->write_ubyte(IA32_FLD_MEM64); - jit->write_ubyte(ia32_modrm(MOD_DISP32, 0, dest)); + jit->write_ubyte(ia32_modrm(MOD_DISP32, 0, src)); jit->write_int32(val); } +/** +* Move data with zero extend +*/ + +inline void IA32_Movzx_Reg32_Rm8(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_uint8_t mode) +{ + jit->write_ubyte(IA32_MOVZX_R32_RM8_1); + jit->write_ubyte(IA32_MOVZX_R32_RM8_2); + jit->write_ubyte(ia32_modrm(mode, dest, src)); +} + +inline void IA32_Movzx_Reg32_Rm8_Disp8(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_int8_t disp) +{ + jit->write_ubyte(IA32_MOVZX_R32_RM8_1); + jit->write_ubyte(IA32_MOVZX_R32_RM8_2); + jit->write_ubyte(ia32_modrm(MOD_DISP8, dest, src)); + jit->write_byte(disp); +} + +inline void IA32_Movzx_Reg32_Rm8_Disp32(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_int32_t disp) +{ + jit->write_ubyte(IA32_MOVZX_R32_RM8_1); + jit->write_ubyte(IA32_MOVZX_R32_RM8_2); + jit->write_ubyte(ia32_modrm(MOD_DISP32, dest, src)); + jit->write_int32(disp); +} + +inline void IA32_Movzx_Reg32_Rm16(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_uint8_t mode) +{ + jit->write_ubyte(IA32_MOVZX_R32_RM16_1); + jit->write_ubyte(IA32_MOVZX_R32_RM16_2); + jit->write_ubyte(ia32_modrm(mode, dest, src)); +} + +inline void IA32_Movzx_Reg32_Rm16_Disp8(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_int8_t disp) +{ + jit->write_ubyte(IA32_MOVZX_R32_RM16_1); + jit->write_ubyte(IA32_MOVZX_R32_RM16_2); + jit->write_ubyte(ia32_modrm(MOD_DISP8, dest, src)); + jit->write_byte(disp); +} + +inline void IA32_Movzx_Reg32_Rm16_Disp32(JitWriter *jit, jit_uint8_t dest, jit_uint8_t src, jit_int32_t disp) +{ + jit->write_ubyte(IA32_MOVZX_R32_RM16_1); + jit->write_ubyte(IA32_MOVZX_R32_RM16_2); + jit->write_ubyte(ia32_modrm(MOD_DISP32, dest, src)); + jit->write_int32(disp); +} + /** * Branching/Jumping */ @@ -1207,6 +1430,21 @@ inline void IA32_SetCC_Rm8(JitWriter *jit, jit_uint8_t reg, jit_uint8_t cond) jit->write_ubyte(ia32_modrm(MOD_REG, 0, reg)); } +inline void IA32_CmovCC_Rm(JitWriter *jit, jit_uint8_t src, jit_uint8_t cond) +{ + jit->write_ubyte(IA32_CMOVCC_RM_1); + jit->write_ubyte(IA32_CMOVCC_RM_2+cond); + jit->write_ubyte(ia32_modrm(MOD_MEM_REG, 0, src)); +} + +inline void IA32_CmovCC_Rm_Disp8(JitWriter *jit, jit_uint8_t src, jit_uint8_t cond, jit_int8_t disp) +{ + jit->write_ubyte(IA32_CMOVCC_RM_1); + jit->write_ubyte(IA32_CMOVCC_RM_2+cond); + jit->write_ubyte(ia32_modrm(MOD_DISP8, 0, src)); + jit->write_byte(disp); +} + inline void IA32_Cmpsb(JitWriter *jit) { jit->write_ubyte(IA32_CMPSB); diff --git a/sourcepawn/jit/x86/jit_x86.cpp b/sourcepawn/jit/x86/jit_x86.cpp index 76822968..323bfcf2 100644 --- a/sourcepawn/jit/x86/jit_x86.cpp +++ b/sourcepawn/jit/x86/jit_x86.cpp @@ -1136,12 +1136,12 @@ inline void WriteOp_Lodb_I(JitWriter *jit) { case 1: { - IA32_And_Rm_Imm32(jit, AMX_REG_PRI, 0x000000FF); + IA32_And_Eax_Imm32(jit, 0x000000FF); break; } case 2: { - IA32_And_Rm_Imm32(jit, AMX_REG_PRI, 0x0000FFFF); + IA32_And_Eax_Imm32(jit, 0x0000FFFF); break; } } @@ -1328,7 +1328,6 @@ inline void WriteOp_Break(JitWriter *jit) jit->set_outputpos(wr); jit->write_uint32((uint32_t)(wr)); jit->set_outputpos(save); - wr = IA32_Call_Imm32(jit, 0); IA32_Write_Jump32(jit, wr, data->jit_break); } @@ -1781,6 +1780,323 @@ inline void WriteOp_Stradjust_Pri(JitWriter *jit) IA32_Sar_Rm_Imm8(jit, AMX_REG_PRI, 2, MOD_REG); } +inline void WriteOp_FloatAbs(JitWriter *jit) +{ + //mov eax, [edi] + //and eax, 0x7FFFFFFF + IA32_Mov_Reg_Rm(jit, AMX_REG_PRI, AMX_REG_STK, MOD_MEM_REG); + IA32_And_Eax_Imm32(jit, 0x7FFFFFFF); + + /* Rectify the stack */ + //add edi, 4 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 4, MOD_REG); +} + +inline void WriteOp_Float(JitWriter *jit) +{ + //fild [edi] + //push eax + //fstp [esp] + //pop eax + IA32_Fild_Mem32(jit, AMX_REG_STK); + IA32_Push_Reg(jit, AMX_REG_PRI); + IA32_Fstp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, AMX_REG_PRI); + + /* Rectify the stack */ + //add edi, 4 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 4, MOD_REG); +} + +inline void WriteOp_FloatAdd(JitWriter *jit) +{ + //push eax + //fld [edi] + //fadd [edi+4] + //fstp [esp] + //pop eax + IA32_Push_Reg(jit, AMX_REG_PRI); + IA32_Fld_Mem32(jit, AMX_REG_STK); + IA32_Fadd_Mem32_Disp8(jit, AMX_REG_STK, 4); + IA32_Fstp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, AMX_REG_PRI); + + /* Rectify the stack */ + //add edi, 8 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 8, MOD_REG); +} + +inline void WriteOp_FloatSub(JitWriter *jit) +{ + //push eax + //fld [edi] + //fsub [edi+4] + //fstp [esp] + //pop eax + IA32_Push_Reg(jit, AMX_REG_PRI); + IA32_Fld_Mem32(jit, AMX_REG_STK); + IA32_Fsub_Mem32_Disp8(jit, AMX_REG_STK, 4); + IA32_Fstp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, AMX_REG_PRI); + + /* Rectify the stack */ + //add edi, 8 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 8, MOD_REG); +} + +inline void WriteOp_FloatMul(JitWriter *jit) +{ + //push eax + //fld [edi] + //fmul [edi+4] + //fstp [esp] + //pop eax + IA32_Push_Reg(jit, AMX_REG_PRI); + IA32_Fld_Mem32(jit, AMX_REG_STK); + IA32_Fmul_Mem32_Disp8(jit, AMX_REG_STK, 4); + IA32_Fstp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, AMX_REG_PRI); + + /* Rectify the stack */ + //add edi, 8 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 8, MOD_REG); +} + +inline void WriteOp_FloatDiv(JitWriter *jit) +{ + //push eax + //fld [edi] + //fdiv [edi+4] + //fstp [esp] + //pop eax + IA32_Push_Reg(jit, AMX_REG_PRI); + IA32_Fld_Mem32(jit, AMX_REG_STK); + IA32_Fdiv_Mem32_Disp8(jit, AMX_REG_STK, 4); + IA32_Fstp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, AMX_REG_PRI); + + /* Rectify the stack */ + //add edi, 8 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 8, MOD_REG); +} + +inline void WriteOp_RountToNearest(JitWriter *jit) +{ + //push eax + //fld [edi] + + //fstcw [esp] + //mov eax, [esp] + //push eax + + //mov [esp+4], 0x3FF + //fadd st(0), st(0) + //fldcw [esp+4] + + //push eax + //push 0x3F000000 ; 0.5f + //fadd [esp] + //fistp [esp+4] + //pop eax + //pop eax + + //pop ecx + //sar eax, 1 + //mov [esp], ecx + //fldcw [esp] + //add esp, 4 + + IA32_Push_Reg(jit, REG_EAX); + IA32_Fld_Mem32(jit, AMX_REG_STK); + + IA32_Fstcw_Mem16_ESP(jit); + IA32_Mov_Reg_RmESP(jit, REG_EAX); + IA32_Push_Reg(jit, REG_EAX); + + IA32_Mov_ESP_Disp8_Imm32(jit, 4, 0x3FF); + IA32_Fadd_FPUreg_ST0(jit, 0); + IA32_Fldcw_Mem16_Disp8_ESP(jit, 4); + + IA32_Push_Reg(jit, REG_EAX); + IA32_Push_Imm32(jit, 0x3F000000); + IA32_Fadd_Mem32_ESP(jit); + IA32_Fistp_Mem32_Disp8_Esp(jit, 4); + IA32_Pop_Reg(jit, REG_EAX); + IA32_Pop_Reg(jit, AMX_REG_PRI); + + IA32_Pop_Reg(jit, REG_ECX); + IA32_Sar_Rm_1(jit, REG_EAX, MOD_REG); + IA32_Mov_RmESP_Reg(jit, REG_ECX); + IA32_Fldcw_Mem16_ESP(jit); + IA32_Add_Rm_Imm8(jit, REG_ESP, 4, MOD_REG); + + /* Rectify the stack */ + //add edi, 4 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 4, MOD_REG); +} + +inline void WriteOp_RoundToFloor(JitWriter *jit) +{ + //push eax + //fld [edi] + + //fstcw [esp] + //mov eax, [esp] + //push eax + + //mov [esp+4], 0x7FF + //fldcw [esp+4] + + //push eax + //fistp [esp] + //pop eax + + //pop ecx + //mov [esp], ecx + //fldcw [esp] + //add esp, 4 + + IA32_Push_Reg(jit, REG_EAX); + IA32_Fld_Mem32(jit, AMX_REG_STK); + + IA32_Fstcw_Mem16_ESP(jit); + IA32_Mov_Reg_RmESP(jit, REG_EAX); + IA32_Push_Reg(jit, REG_EAX); + + IA32_Mov_ESP_Disp8_Imm32(jit, 4, 0x7FF); + IA32_Fldcw_Mem16_Disp8_ESP(jit, 4); + + IA32_Push_Reg(jit, REG_EAX); + IA32_Fistp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, REG_EAX); + + IA32_Pop_Reg(jit, REG_ECX); + IA32_Mov_RmESP_Reg(jit, REG_ECX); + IA32_Fldcw_Mem16_ESP(jit); + IA32_Add_Rm_Imm8(jit, REG_ESP, 4, MOD_REG); + + /* Rectify the stack */ + //add edi, 4 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 4, MOD_REG); +} + +inline void WriteOp_RoundToCeil(JitWriter *jit) +{ + //push eax + //fld [edi] + + //fstcw [esp] + //mov eax, [esp] + //push eax + + //mov [esp+4], 0xBFF + //fldcw [esp+4] + + //push eax + //fistp [esp] + //pop eax + + //pop ecx + //mov [esp], ecx + //fldcw [esp] + //add esp, 4 + + IA32_Push_Reg(jit, REG_EAX); + IA32_Fld_Mem32(jit, AMX_REG_STK); + + IA32_Fstcw_Mem16_ESP(jit); + IA32_Mov_Reg_RmESP(jit, REG_EAX); + IA32_Push_Reg(jit, REG_EAX); + + IA32_Mov_ESP_Disp8_Imm32(jit, 4, 0xBFF); + IA32_Fldcw_Mem16_Disp8_ESP(jit, 4); + + IA32_Push_Reg(jit, REG_EAX); + IA32_Fistp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, REG_EAX); + + IA32_Pop_Reg(jit, REG_ECX); + IA32_Mov_RmESP_Reg(jit, REG_ECX); + IA32_Fldcw_Mem16_ESP(jit); + IA32_Add_Rm_Imm8(jit, REG_ESP, 4, MOD_REG); + + /* Rectify the stack */ + //add edi, 4 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 4, MOD_REG); +} + +inline void WriteOp_RoundToZero(JitWriter *jit) +{ + //push eax + //fld [edi] + + //fstcw [esp] + //mov eax, [esp] + //push eax + + //mov [esp+4], 0xFFF + //fldcw [esp+4] + + //push eax + //fistp [esp] + //pop eax + + //pop ecx + //mov [esp], ecx + //fldcw [esp] + //add esp, 4 + + IA32_Push_Reg(jit, REG_EAX); + IA32_Fld_Mem32(jit, AMX_REG_STK); + + IA32_Fstcw_Mem16_ESP(jit); + IA32_Mov_Reg_RmESP(jit, REG_EAX); + IA32_Push_Reg(jit, REG_EAX); + + IA32_Mov_ESP_Disp8_Imm32(jit, 4, 0xFFF); + IA32_Fldcw_Mem16_Disp8_ESP(jit, 4); + + IA32_Push_Reg(jit, REG_EAX); + IA32_Fistp_Mem32_ESP(jit); + IA32_Pop_Reg(jit, REG_EAX); + + IA32_Pop_Reg(jit, REG_ECX); + IA32_Mov_RmESP_Reg(jit, REG_ECX); + IA32_Fldcw_Mem16_ESP(jit); + IA32_Add_Rm_Imm8(jit, REG_ESP, 4, MOD_REG); + + /* Rectify the stack */ + //add edi, 4 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 4, MOD_REG); +} + +inline void WriteOp_FloatCompare(JitWriter *jit) +{ + CompData *data = (CompData *)jit->data; + + //fld [edi] + //fld [edi+4] + //fucomip st(0), st(1) + //mov ecx, + //cmovz eax, [ecx+4] + //cmovb eax, [ecx+8] + //cmova eax, [ecx] + //fstp st(0) + + IA32_Fld_Mem32(jit, AMX_REG_STK); + IA32_Fld_Mem32_Disp8(jit, AMX_REG_STK, 4); + IA32_Fucomip_ST0_FPUreg(jit, 1); + IA32_Mov_Reg_Imm32(jit, AMX_REG_TMP, (jit_int32_t)jit->outbase + data->jit_rounding_table); + IA32_CmovCC_Rm_Disp8(jit, AMX_REG_TMP, CC_Z, 4); + IA32_CmovCC_Rm_Disp8(jit, AMX_REG_TMP, CC_B, 8); + IA32_CmovCC_Rm(jit, AMX_REG_TMP, CC_A); + IA32_Fstp_FPUreg(jit, 0); + + /* Rectify the stack */ + //add edi, 8 + IA32_Add_Rm_Imm8(jit, AMX_REG_STK, 8, MOD_REG); +} + /************************************************* ************************************************* * JIT PROPER ************************************ @@ -1979,6 +2295,10 @@ jit_rewind: Write_Check_VerifyAddr(jit, REG_EDX); } + /* Write the rounding table for the float compare opcode */ + data->jit_rounding_table = jit->get_outputpos(); + Write_RoundingTable(jit); + /* Actual code generation! */ if (writer.outbase == NULL) { @@ -1998,6 +2318,22 @@ jit_rewind: /* Now read the opcode and continue. */ op = (OPCODE)writer.read_cell(); + + /* Patch the floating point natives with our opcodes */ + if (op == OP_SYSREQ_N) + { + cell_t idx = writer.read_cell(); + if (data->jit_float_table[idx].found) + { + writer.inptr -= 2; + *(writer.inptr++) = data->jit_float_table[idx].index; + *(writer.inptr++) = OP_NOP; + *(writer.inptr++) = OP_NOP; + op = (OPCODE)data->jit_float_table[idx].index; + } else { + writer.inptr--; + } + } switch (op) { #include "opcode_switch.inc" @@ -2270,11 +2606,54 @@ void JITX86::FreeContext(sp_context_t *ctx) ICompilation *JITX86::StartCompilation(sp_plugin_t *plugin) { CompData *data = new CompData; + uint32_t max_natives = plugin->info.natives_num; + const char *strbase = plugin->info.stringbase; data->plugin = plugin; data->inline_level = JIT_INLINE_ERRORCHECKS|JIT_INLINE_NATIVES; data->error_set = SP_ERROR_NONE; + data->jit_float_table = new floattbl_t[max_natives]; + for (uint32_t i=0; iinfo.natives[i].name; + if (!strcmp(name, "FloatAbs")) + { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_FABS; + } else if (!strcmp(name, "FloatAdd")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_FLOATADD; + } else if (!strcmp(name, "FloatSub")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_FLOATSUB; + } else if (!strcmp(name, "FloatMul")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_FLOATMUL; + } else if (!strcmp(name, "FloatDiv")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_FLOATDIV; + } else if (!strcmp(name, "float")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_FLOAT; + } else if (!strcmp(name, "FloatCompare")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_FLOATCMP; + } else if (!strcmp(name, "RoundToZero")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_RND_TO_ZERO; + } else if (!strcmp(name, "RoundToCeil")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_RND_TO_CEIL; + } else if (!strcmp(name, "RoundToFloor")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_RND_TO_FLOOR; + } else if (!strcmp(name, "RountToNearest")) { + data->jit_float_table[i].found = true; + data->jit_float_table[i].index = OP_RND_TO_NEAREST; + } + } + return data; } @@ -2284,6 +2663,7 @@ void JITX86::AbortCompilation(ICompilation *co) { engine->BaseFree(((CompData *)co)->rebase); } + delete [] ((CompData *)co)->jit_float_table; delete (CompData *)co; } diff --git a/sourcepawn/jit/x86/jit_x86.h b/sourcepawn/jit/x86/jit_x86.h index b348b483..f2620bf7 100644 --- a/sourcepawn/jit/x86/jit_x86.h +++ b/sourcepawn/jit/x86/jit_x86.h @@ -49,6 +49,17 @@ typedef struct functracker_s unsigned int code_size; } functracker_t; +struct floattbl_t +{ + floattbl_t() + { + found = false; + index = 0; + } + bool found; + unsigned int index; +}; + class CompData : public ICompilation { public: @@ -80,6 +91,8 @@ public: jitoffs_t jit_error_array_too_big; jitoffs_t jit_extern_error; /* returning generic error */ jitoffs_t jit_sysreq_c; /* old version! */ + jitoffs_t jit_rounding_table; + floattbl_t *jit_float_table; uint32_t codesize; /* total codesize */ }; diff --git a/sourcepawn/jit/x86/opcode_helpers.cpp b/sourcepawn/jit/x86/opcode_helpers.cpp index 65d1d554..0007824c 100644 --- a/sourcepawn/jit/x86/opcode_helpers.cpp +++ b/sourcepawn/jit/x86/opcode_helpers.cpp @@ -799,3 +799,10 @@ int JIT_VerifyLowBoundTracker(sp_context_t *ctx) return SP_ERROR_NONE; } + +void Write_RoundingTable(JitWriter *jit) +{ + jit->write_int32(-1); + jit->write_int32(0); + jit->write_int32(1); +} \ No newline at end of file diff --git a/sourcepawn/jit/x86/opcode_helpers.h b/sourcepawn/jit/x86/opcode_helpers.h index 2e1a4ff6..75c27ada 100644 --- a/sourcepawn/jit/x86/opcode_helpers.h +++ b/sourcepawn/jit/x86/opcode_helpers.h @@ -91,6 +91,11 @@ int JIT_VerifyOrAllocateTracker(sp_context_t *ctx); */ void WriteOp_Tracker_Push_Reg(JitWriter *jit, uint8_t reg); +/** +* Writes the rounding table for the float compare opcode. +*/ +void Write_RoundingTable(JitWriter *jit); + /** * Legend for Statuses: * ****** *** ******** @@ -275,6 +280,17 @@ typedef enum OP_GENARRAY, //VERIFIED OP_GENARRAY_Z, //-VERIFIED (not tested for 1D arrays) OP_STRADJUST_PRI, //VERIFIED + OP_FABS, //VERIFIED + OP_FLOAT, //VERIFIED + OP_FLOATADD, //VERIFIED + OP_FLOATSUB, //VERIFIED + OP_FLOATMUL, //VERIFIED + OP_FLOATDIV, //VERIFIED + OP_RND_TO_NEAREST, //VERIFIED + OP_RND_TO_FLOOR, //VERIFIED + OP_RND_TO_CEIL, //VERIFIED + OP_RND_TO_ZERO, //VERIFIED + OP_FLOATCMP, //VERIFIED /* ----- */ OP_NUM_OPCODES } OPCODE; @@ -294,7 +310,7 @@ typedef enum * EXEC FUNCTION * VERIFY ADDR * -* Oh and ALIGN all stuff that is called via CALL like what's done with PROC. +* Oh and ALIGN to 16BYTES all stuff that is called via CALL and frequently used jump labels like what's done with PROC. */ #endif //_INCLUDE_SOURCEPAWN_JIT_X86_OPCODE_INFO_H_ diff --git a/sourcepawn/jit/x86/opcode_switch.inc b/sourcepawn/jit/x86/opcode_switch.inc index 51b8676b..cfa31237 100644 --- a/sourcepawn/jit/x86/opcode_switch.inc +++ b/sourcepawn/jit/x86/opcode_switch.inc @@ -678,6 +678,61 @@ WriteOp_Stradjust_Pri(jit); break; } + case OP_FABS: + { + WriteOp_FloatAbs(jit); + break; + } + case OP_FLOAT: + { + WriteOp_Float(jit); + break; + } + case OP_FLOATADD: + { + WriteOp_FloatAdd(jit); + break; + } + case OP_FLOATSUB: + { + WriteOp_FloatSub(jit); + break; + } + case OP_FLOATMUL: + { + WriteOp_FloatMul(jit); + break; + } + case OP_FLOATDIV: + { + WriteOp_FloatDiv(jit); + break; + } + case OP_RND_TO_NEAREST: + { + WriteOp_RountToNearest(jit); + break; + } + case OP_RND_TO_FLOOR: + { + WriteOp_RoundToFloor(jit); + break; + } + case OP_RND_TO_ZERO: + { + WriteOp_RoundToZero(jit); + break; + } + case OP_RND_TO_CEIL: + { + WriteOp_RoundToCeil(jit); + break; + } + case OP_FLOATCMP: + { + WriteOp_FloatCompare(jit); + break; + } #if defined USE_UNGEN_OPCODES #include "ungen_opcode_switch.inc" #endif @@ -686,5 +741,3 @@ data->error_set = SP_ERROR_INVALID_INSTRUCTION; break; } - -