diff --git a/public/jit/x86/assembler-x86.cpp b/public/jit/x86/assembler-x86.cpp new file mode 100644 index 00000000..f00585aa --- /dev/null +++ b/public/jit/x86/assembler-x86.cpp @@ -0,0 +1,33 @@ +/** + * vim: set ts=8 sts=2 sw=2 tw=99 et: + * ============================================================================= + * SourcePawn JIT SDK + * Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved. + * ============================================================================= + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, version 3.0, as published by the + * Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * As a special exception, AlliedModders LLC gives you permission to link the + * code of this program (as well as its derivative works) to "Half-Life 2," the + * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software + * by the Valve Corporation. You must obey the GNU General Public License in + * all respects for all other code used. Additionally, AlliedModders LLC grants + * this exception to all derivative works. AlliedModders LLC defines further + * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007), + * or . + * + * Version: $Id$ + */ +#include + +CPUFeatures AssemblerX86::X86Features; diff --git a/public/jit/x86/assembler-x86.h b/public/jit/x86/assembler-x86.h index 5afcd443..f3bb7264 100644 --- a/public/jit/x86/assembler-x86.h +++ b/public/jit/x86/assembler-x86.h @@ -94,6 +94,25 @@ struct FloatRegister } }; +struct CPUFeatures +{ + CPUFeatures() + { + memset(this, 0, sizeof(*this)); + } + + bool fpu; + bool mmx; + bool sse; + bool sse2; + bool sse3; + bool ssse3; + bool sse4_1; + bool sse4_2; + bool avx; + bool avx2; +}; + const Register eax = { 0 }; const Register ecx = { 1 }; const Register edx = { 2 }; @@ -299,7 +318,19 @@ struct Operand class AssemblerX86 : public Assembler { + private: + // List of processor features; to be used, this must be filled in at + // startup. + static CPUFeatures X86Features; + public: + static void SetFeatures(const CPUFeatures &features) { + X86Features = features; + } + static const CPUFeatures &Features() { + return X86Features; + } + void movl(Register dest, Register src) { emit1(0x89, src.code, dest.code); } @@ -712,6 +743,74 @@ class AssemblerX86 : public Assembler outOfMemory_ = true; } + void cpuid() { + emit2(0x0f, 0xa2); + } + + + // SSE operations can only be used if the feature detection function has + // been run *and* detected the appropriate level of functionality. + void movss(FloatRegister dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x10, dest.code, src); + } + void cvttss2si(Register dest, Register src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x2c, dest.code, src.code); + } + void cvttss2si(Register dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x2c, dest.code, src); + } + void cvtss2si(Register dest, Register src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x2d, dest.code, src.code); + } + void cvtss2si(Register dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x2d, dest.code, src); + } + void cvtsi2ss(FloatRegister dest, Register src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x2a, dest.code, src.code); + } + void cvtsi2ss(FloatRegister dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x2a, dest.code, src); + } + void addss(FloatRegister dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x58, dest.code, src); + } + void subss(FloatRegister dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x5c, dest.code, src); + } + void mulss(FloatRegister dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x59, dest.code, src); + } + void divss(FloatRegister dest, const Operand &src) { + assert(Features().sse); + emit3(0xf3, 0x0f, 0x5e, dest.code, src); + } + void ucomiss(FloatRegister left, Register right) { + emit2(0x0f, 0x2e, left.code, right.code); + } + void ucomiss(FloatRegister left, const Operand &right) { + emit2(0x0f, 0x2e, left.code, right); + } + + // SSE2-only instructions. + void movd(Register dest, FloatRegister src) { + assert(Features().sse2); + emit3(0x66, 0x0f, 0x7e, dest.code, src.code); + } + void movd(Register dest, const Operand &src) { + assert(Features().sse2); + emit3(0x66, 0x0f, 0x7e, dest.code, src); + } + static void PatchRel32Absolute(uint8_t *ip, void *ptr) { int32_t delta = uint32_t(ptr) - uint32_t(ip); *reinterpret_cast(ip - 4) = delta; @@ -806,6 +905,22 @@ class AssemblerX86 : public Assembler emit(reg, operand); } + void emit3(uint8_t prefix1, uint8_t prefix2, uint8_t opcode) { + ensureSpace(); + *pos_++ = prefix1; + *pos_++ = prefix2; + *pos_++ = opcode; + } + void emit3(uint8_t prefix1, uint8_t prefix2, uint8_t opcode, uint8_t reg, uint8_t opreg) { + emit3(prefix1, prefix2, opcode); + assert(reg <= 7); + *pos_++ = (kModeReg << 6) | (reg << 3) | opreg; + } + void emit3(uint8_t prefix1, uint8_t prefix2, uint8_t opcode, uint8_t reg, const Operand &operand) { + emit3(prefix1, prefix2, opcode); + emit(reg, operand); + } + template void shift_cl(const T &t, uint8_t r) { emit1(0xd3, r, t); diff --git a/public/jit/x86/macro-assembler-x86.h b/public/jit/x86/macro-assembler-x86.h new file mode 100644 index 00000000..734018ca --- /dev/null +++ b/public/jit/x86/macro-assembler-x86.h @@ -0,0 +1,104 @@ +/** + * vim: set ts=8 sts=2 sw=2 tw=99 et: + * ============================================================================= + * SourcePawn JIT SDK + * Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved. + * ============================================================================= + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, version 3.0, as published by the + * Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * As a special exception, AlliedModders LLC gives you permission to link the + * code of this program (as well as its derivative works) to "Half-Life 2," the + * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software + * by the Valve Corporation. You must obey the GNU General Public License in + * all respects for all other code used. Additionally, AlliedModders LLC grants + * this exception to all derivative works. AlliedModders LLC defines further + * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007), + * or . + * + * Version: $Id$ + */ +#ifndef _include_sourcepawn_macroassembler_x86h__ +#define _include_sourcepawn_macroassembler_x86h__ + +#include +#include +#include +#include + +class MacroAssemblerX86 : public AssemblerX86 +{ + public: + static void GenerateFeatureDetection(MacroAssemblerX86 &masm) { + masm.push(ebp); + masm.movl(ebp, esp); + masm.push(ebx); + { + // Get ECX, EDX feature bits at the first CPUID level. + masm.movl(eax, 1); + masm.cpuid(); + masm.movl(eax, Operand(ebp, 8)); + masm.movl(Operand(eax, 0), ecx); + masm.movl(eax, Operand(ebp, 12)); + masm.movl(Operand(eax, 0), edx); + } + + // Zero out bits we're not guaranteed to get. + masm.movl(eax, Operand(ebp, 16)); + masm.movl(Operand(eax, 0), 0); + + Label skip_level_7; + { + // Get EBX feature bits at 7th CPUID level. + masm.movl(eax, 0); + masm.cpuid(); + masm.cmpl(eax, 7); + masm.j(below, &skip_level_7); + masm.movl(eax, 7); + masm.movl(ecx, 0); + masm.cpuid(); + masm.movl(eax, Operand(ebp, 16)); + masm.movl(Operand(eax, 0), ebx); + } + masm.bind(&skip_level_7); + + masm.pop(ebx); + masm.pop(ebp); + masm.ret(); + } + + static void RunFeatureDetection(void *code) { + typedef void (*fn_t)(int *reg_ecx, int *reg_edx, int *reg_ebx); + + int reg_ecx, reg_edx, reg_ebx; + ((fn_t)code)(®_ecx, ®_edx, ®_ebx); + + CPUFeatures features; + features.fpu = !!(reg_edx & (1 << 0)); + features.mmx = !!(reg_edx & (1 << 23)); + features.sse = !!(reg_edx & (1 << 25)); + features.sse2 = !!(reg_edx & (1 << 26)); + features.sse3 = !!(reg_ecx & (1 << 0)); + features.ssse3 = !!(reg_ecx & (1 << 9)); + features.sse4_1 = !!(reg_ecx & (1 << 19)); + features.sse4_2 = !!(reg_ecx & (1 << 20)); + features.avx = !!(reg_ecx & (1 << 28)); + features.avx2 = !!(reg_ebx & (1 << 5)); + SetFeatures(features); + } + + private: +}; + +#endif // _include_sourcepawn_macroassembler_x86h__ + diff --git a/sourcepawn/jit/AMBuilder b/sourcepawn/jit/AMBuilder index db304d85..aa919c72 100644 --- a/sourcepawn/jit/AMBuilder +++ b/sourcepawn/jit/AMBuilder @@ -37,7 +37,8 @@ binary.AddSourceFiles('sourcepawn/jit', [ 'zlib/uncompr.c', 'zlib/zutil.c', 'md5/md5.cpp', - '../../knight/shared/KeCodeAllocator.cpp' + '../../knight/shared/KeCodeAllocator.cpp', + '../../public/jit/x86/assembler-x86.cpp' ]) SM.AutoVersion('sourcepawn/jit', binary) SM.ExtractDebugInfo(extension, binary) diff --git a/sourcepawn/jit/Makefile.shell b/sourcepawn/jit/Makefile.shell index f19c7399..3e80d316 100644 --- a/sourcepawn/jit/Makefile.shell +++ b/sourcepawn/jit/Makefile.shell @@ -34,6 +34,7 @@ OBJECTS = dll_exports.cpp \ zlib/zutil.c \ OBJECTS += ../../knight/shared/KeCodeAllocator.cpp +OBJECTS += ../../public/jit/x86/assembler-x86.cpp ############################################## ### CONFIGURE ANY OTHER FLAGS/OPTIONS HERE ### @@ -75,6 +76,7 @@ ifeq "$(GCC_VERSION)" "4" endif OBJ_LINUX := $(OBJECTS:../../knight/shared/%.cpp=$(BIN_DIR)/knight/%.o) +OBJ_LINUX := $(OBJ_LINUX:../../public/jit/x86/%.cpp=$(BIN_DIR)/%.o) OBJ_LINUX := $(OBJ_LINUX:%.cpp=$(BIN_DIR)/%.o) OBJ_LINUX := $(OBJ_LINUX:%.c=$(BIN_DIR)/%.o) @@ -89,6 +91,9 @@ $(BIN_DIR)/%.o: %.cpp $(BIN_DIR)/knight/%.o: ../../knight/shared/%.cpp $(CXX) $(INCLUDE) $(CFLAGS) $(CXXFLAGS) -o $@ -c $< +$(BIN_DIR)/assembler-x86.o: ../../public/jit/x86/assembler-x86.cpp + $(CXX) $(INCLUDE) $(CFLAGS) $(CXXFLAGS) -o $@ -c $< + all: mkdir -p $(BIN_DIR)/x86 mkdir -p $(BIN_DIR)/md5 diff --git a/sourcepawn/jit/x86/jit_x86.cpp b/sourcepawn/jit/x86/jit_x86.cpp index 034aebe2..29f4ba9e 100644 --- a/sourcepawn/jit/x86/jit_x86.cpp +++ b/sourcepawn/jit/x86/jit_x86.cpp @@ -983,10 +983,15 @@ Compiler::emitOp(OPCODE op) break; case OP_FLOAT: - __ fild32(Operand(edi, 0)); - __ subl(esp, 4); - __ fstp32(Operand(esp, 0)); - __ pop(pri); + if (MacroAssemblerX86::Features().sse2) { + __ cvtsi2ss(xmm0, Operand(edi, 0)); + __ movd(pri, xmm0); + } else { + __ fild32(Operand(edi, 0)); + __ subl(esp, 4); + __ fstp32(Operand(esp, 0)); + __ pop(pri); + } __ addl(stk, 4); break; @@ -994,34 +999,52 @@ Compiler::emitOp(OPCODE op) case OP_FLOATSUB: case OP_FLOATMUL: case OP_FLOATDIV: - __ subl(esp, 4); - __ fld32(Operand(edi, 0)); + if (MacroAssemblerX86::Features().sse2) { + __ movss(xmm0, Operand(stk, 0)); + if (op == OP_FLOATADD) + __ addss(xmm0, Operand(stk, 4)); + else if (op == OP_FLOATSUB) + __ subss(xmm0, Operand(stk, 4)); + else if (op == OP_FLOATMUL) + __ mulss(xmm0, Operand(stk, 4)); + else if (op == OP_FLOATDIV) + __ divss(xmm0, Operand(stk, 4)); + __ movd(pri, xmm0); + } else { + __ subl(esp, 4); + __ fld32(Operand(stk, 0)); - if (op == OP_FLOATADD) - __ fadd32(Operand(edi, 4)); - else if (op == OP_FLOATSUB) - __ fsub32(Operand(edi, 4)); - else if (op == OP_FLOATMUL) - __ fmul32(Operand(edi, 4)); - else if (op == OP_FLOATDIV) - __ fdiv32(Operand(edi, 4)); + if (op == OP_FLOATADD) + __ fadd32(Operand(stk, 4)); + else if (op == OP_FLOATSUB) + __ fsub32(Operand(stk, 4)); + else if (op == OP_FLOATMUL) + __ fmul32(Operand(stk, 4)); + else if (op == OP_FLOATDIV) + __ fdiv32(Operand(stk, 4)); - __ fstp32(Operand(esp, 0)); - __ pop(pri); + __ fstp32(Operand(esp, 0)); + __ pop(pri); + } __ addl(stk, 8); break; case OP_RND_TO_NEAREST: { - static float kRoundToNearest = 0.5f; - // From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf - __ fld32(Operand(edi, 0)); - __ fadd32(st0, st0); - __ fadd32(Operand(ExternalAddress(&kRoundToNearest))); - __ subl(esp, 4); - __ fistp32(Operand(esp, 0)); - __ pop(pri); - __ sarl(pri, 1); + if (MacroAssemblerX86::Features().sse) { + // Assume no one is touching MXCSR. + __ cvtss2si(pri, Operand(stk, 0)); + } else { + static float kRoundToNearest = 0.5f; + // From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf + __ fld32(Operand(stk, 0)); + __ fadd32(st0, st0); + __ fadd32(Operand(ExternalAddress(&kRoundToNearest))); + __ subl(esp, 4); + __ fistp32(Operand(esp, 0)); + __ pop(pri); + __ sarl(pri, 1); + } __ addl(stk, 4); break; } @@ -1030,7 +1053,7 @@ Compiler::emitOp(OPCODE op) { static float kRoundToCeil = -0.5f; // From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf - __ fld32(Operand(edi, 0)); + __ fld32(Operand(stk, 0)); __ fadd32(st0, st0); __ fsubr32(Operand(ExternalAddress(&kRoundToCeil))); __ subl(esp, 4); @@ -1043,15 +1066,19 @@ Compiler::emitOp(OPCODE op) } case OP_RND_TO_ZERO: - __ fld32(Operand(edi, 0)); - __ subl(esp, 8); - __ fstcw(Operand(esp, 4)); - __ movl(Operand(esp, 0), 0xfff); - __ fldcw(Operand(esp, 0)); - __ fistp32(Operand(esp, 0)); - __ pop(pri); - __ fldcw(Operand(esp, 0)); - __ addl(esp, 4); + if (MacroAssemblerX86::Features().sse) { + __ cvttss2si(pri, Operand(stk, 0)); + } else { + __ fld32(Operand(stk, 0)); + __ subl(esp, 8); + __ fstcw(Operand(esp, 4)); + __ movl(Operand(esp, 0), 0xfff); + __ fldcw(Operand(esp, 0)); + __ fistp32(Operand(esp, 0)); + __ pop(pri); + __ fldcw(Operand(esp, 0)); + __ addl(esp, 4); + } __ addl(stk, 4); break; @@ -1071,10 +1098,15 @@ Compiler::emitOp(OPCODE op) case OP_FLOATCMP: { Label bl, ab, done; - __ fld32(Operand(edi, 0)); - __ fld32(Operand(edi, 4)); - __ fucomip(st1); - __ fstp(st0); + if (MacroAssemblerX86::Features().sse) { + __ movss(xmm0, Operand(stk, 4)); + __ ucomiss(xmm0, Operand(stk, 0)); + } else { + __ fld32(Operand(stk, 0)); + __ fld32(Operand(stk, 4)); + __ fucomip(st1); + __ fstp(st0); + } __ j(above, &ab); __ j(below, &bl); __ xorl(pri, pri); @@ -1869,6 +1901,14 @@ bool JITX86::InitializeJIT() if (!m_pJitGenArray) return false; + MacroAssemblerX86 masm; + MacroAssemblerX86::GenerateFeatureDetection(masm); + void *code = LinkCode(masm); + if (!code) + return false; + MacroAssemblerX86::RunFeatureDetection(code); + KE_FreeCode(g_pCodeCache, code); + return true; } diff --git a/sourcepawn/jit/x86/jit_x86.h b/sourcepawn/jit/x86/jit_x86.h index a59fc8a3..2b4c6d41 100644 --- a/sourcepawn/jit/x86/jit_x86.h +++ b/sourcepawn/jit/x86/jit_x86.h @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include "jit_shared.h" #include "BaseRuntime.h"