diff --git a/public/jit/x86/assembler-x86.cpp b/public/jit/x86/assembler-x86.cpp
new file mode 100644
index 00000000..f00585aa
--- /dev/null
+++ b/public/jit/x86/assembler-x86.cpp
@@ -0,0 +1,33 @@
+/**
+ * vim: set ts=8 sts=2 sw=2 tw=99 et:
+ * =============================================================================
+ * SourcePawn JIT SDK
+ * Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved.
+ * =============================================================================
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License, version 3.0, as published by the
+ * Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see .
+ *
+ * As a special exception, AlliedModders LLC gives you permission to link the
+ * code of this program (as well as its derivative works) to "Half-Life 2," the
+ * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
+ * by the Valve Corporation. You must obey the GNU General Public License in
+ * all respects for all other code used. Additionally, AlliedModders LLC grants
+ * this exception to all derivative works. AlliedModders LLC defines further
+ * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
+ * or .
+ *
+ * Version: $Id$
+ */
+#include
+
+CPUFeatures AssemblerX86::X86Features;
diff --git a/public/jit/x86/assembler-x86.h b/public/jit/x86/assembler-x86.h
index 5afcd443..f3bb7264 100644
--- a/public/jit/x86/assembler-x86.h
+++ b/public/jit/x86/assembler-x86.h
@@ -94,6 +94,25 @@ struct FloatRegister
}
};
+struct CPUFeatures
+{
+ CPUFeatures()
+ {
+ memset(this, 0, sizeof(*this));
+ }
+
+ bool fpu;
+ bool mmx;
+ bool sse;
+ bool sse2;
+ bool sse3;
+ bool ssse3;
+ bool sse4_1;
+ bool sse4_2;
+ bool avx;
+ bool avx2;
+};
+
const Register eax = { 0 };
const Register ecx = { 1 };
const Register edx = { 2 };
@@ -299,7 +318,19 @@ struct Operand
class AssemblerX86 : public Assembler
{
+ private:
+ // List of processor features; to be used, this must be filled in at
+ // startup.
+ static CPUFeatures X86Features;
+
public:
+ static void SetFeatures(const CPUFeatures &features) {
+ X86Features = features;
+ }
+ static const CPUFeatures &Features() {
+ return X86Features;
+ }
+
void movl(Register dest, Register src) {
emit1(0x89, src.code, dest.code);
}
@@ -712,6 +743,74 @@ class AssemblerX86 : public Assembler
outOfMemory_ = true;
}
+ void cpuid() {
+ emit2(0x0f, 0xa2);
+ }
+
+
+ // SSE operations can only be used if the feature detection function has
+ // been run *and* detected the appropriate level of functionality.
+ void movss(FloatRegister dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x10, dest.code, src);
+ }
+ void cvttss2si(Register dest, Register src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x2c, dest.code, src.code);
+ }
+ void cvttss2si(Register dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x2c, dest.code, src);
+ }
+ void cvtss2si(Register dest, Register src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x2d, dest.code, src.code);
+ }
+ void cvtss2si(Register dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x2d, dest.code, src);
+ }
+ void cvtsi2ss(FloatRegister dest, Register src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x2a, dest.code, src.code);
+ }
+ void cvtsi2ss(FloatRegister dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x2a, dest.code, src);
+ }
+ void addss(FloatRegister dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x58, dest.code, src);
+ }
+ void subss(FloatRegister dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x5c, dest.code, src);
+ }
+ void mulss(FloatRegister dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x59, dest.code, src);
+ }
+ void divss(FloatRegister dest, const Operand &src) {
+ assert(Features().sse);
+ emit3(0xf3, 0x0f, 0x5e, dest.code, src);
+ }
+ void ucomiss(FloatRegister left, Register right) {
+ emit2(0x0f, 0x2e, left.code, right.code);
+ }
+ void ucomiss(FloatRegister left, const Operand &right) {
+ emit2(0x0f, 0x2e, left.code, right);
+ }
+
+ // SSE2-only instructions.
+ void movd(Register dest, FloatRegister src) {
+ assert(Features().sse2);
+ emit3(0x66, 0x0f, 0x7e, dest.code, src.code);
+ }
+ void movd(Register dest, const Operand &src) {
+ assert(Features().sse2);
+ emit3(0x66, 0x0f, 0x7e, dest.code, src);
+ }
+
static void PatchRel32Absolute(uint8_t *ip, void *ptr) {
int32_t delta = uint32_t(ptr) - uint32_t(ip);
*reinterpret_cast(ip - 4) = delta;
@@ -806,6 +905,22 @@ class AssemblerX86 : public Assembler
emit(reg, operand);
}
+ void emit3(uint8_t prefix1, uint8_t prefix2, uint8_t opcode) {
+ ensureSpace();
+ *pos_++ = prefix1;
+ *pos_++ = prefix2;
+ *pos_++ = opcode;
+ }
+ void emit3(uint8_t prefix1, uint8_t prefix2, uint8_t opcode, uint8_t reg, uint8_t opreg) {
+ emit3(prefix1, prefix2, opcode);
+ assert(reg <= 7);
+ *pos_++ = (kModeReg << 6) | (reg << 3) | opreg;
+ }
+ void emit3(uint8_t prefix1, uint8_t prefix2, uint8_t opcode, uint8_t reg, const Operand &operand) {
+ emit3(prefix1, prefix2, opcode);
+ emit(reg, operand);
+ }
+
template
void shift_cl(const T &t, uint8_t r) {
emit1(0xd3, r, t);
diff --git a/public/jit/x86/macro-assembler-x86.h b/public/jit/x86/macro-assembler-x86.h
new file mode 100644
index 00000000..734018ca
--- /dev/null
+++ b/public/jit/x86/macro-assembler-x86.h
@@ -0,0 +1,104 @@
+/**
+ * vim: set ts=8 sts=2 sw=2 tw=99 et:
+ * =============================================================================
+ * SourcePawn JIT SDK
+ * Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved.
+ * =============================================================================
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License, version 3.0, as published by the
+ * Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see .
+ *
+ * As a special exception, AlliedModders LLC gives you permission to link the
+ * code of this program (as well as its derivative works) to "Half-Life 2," the
+ * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
+ * by the Valve Corporation. You must obey the GNU General Public License in
+ * all respects for all other code used. Additionally, AlliedModders LLC grants
+ * this exception to all derivative works. AlliedModders LLC defines further
+ * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
+ * or .
+ *
+ * Version: $Id$
+ */
+#ifndef _include_sourcepawn_macroassembler_x86h__
+#define _include_sourcepawn_macroassembler_x86h__
+
+#include
+#include
+#include
+#include
+
+class MacroAssemblerX86 : public AssemblerX86
+{
+ public:
+ static void GenerateFeatureDetection(MacroAssemblerX86 &masm) {
+ masm.push(ebp);
+ masm.movl(ebp, esp);
+ masm.push(ebx);
+ {
+ // Get ECX, EDX feature bits at the first CPUID level.
+ masm.movl(eax, 1);
+ masm.cpuid();
+ masm.movl(eax, Operand(ebp, 8));
+ masm.movl(Operand(eax, 0), ecx);
+ masm.movl(eax, Operand(ebp, 12));
+ masm.movl(Operand(eax, 0), edx);
+ }
+
+ // Zero out bits we're not guaranteed to get.
+ masm.movl(eax, Operand(ebp, 16));
+ masm.movl(Operand(eax, 0), 0);
+
+ Label skip_level_7;
+ {
+ // Get EBX feature bits at 7th CPUID level.
+ masm.movl(eax, 0);
+ masm.cpuid();
+ masm.cmpl(eax, 7);
+ masm.j(below, &skip_level_7);
+ masm.movl(eax, 7);
+ masm.movl(ecx, 0);
+ masm.cpuid();
+ masm.movl(eax, Operand(ebp, 16));
+ masm.movl(Operand(eax, 0), ebx);
+ }
+ masm.bind(&skip_level_7);
+
+ masm.pop(ebx);
+ masm.pop(ebp);
+ masm.ret();
+ }
+
+ static void RunFeatureDetection(void *code) {
+ typedef void (*fn_t)(int *reg_ecx, int *reg_edx, int *reg_ebx);
+
+ int reg_ecx, reg_edx, reg_ebx;
+ ((fn_t)code)(®_ecx, ®_edx, ®_ebx);
+
+ CPUFeatures features;
+ features.fpu = !!(reg_edx & (1 << 0));
+ features.mmx = !!(reg_edx & (1 << 23));
+ features.sse = !!(reg_edx & (1 << 25));
+ features.sse2 = !!(reg_edx & (1 << 26));
+ features.sse3 = !!(reg_ecx & (1 << 0));
+ features.ssse3 = !!(reg_ecx & (1 << 9));
+ features.sse4_1 = !!(reg_ecx & (1 << 19));
+ features.sse4_2 = !!(reg_ecx & (1 << 20));
+ features.avx = !!(reg_ecx & (1 << 28));
+ features.avx2 = !!(reg_ebx & (1 << 5));
+ SetFeatures(features);
+ }
+
+ private:
+};
+
+#endif // _include_sourcepawn_macroassembler_x86h__
+
diff --git a/sourcepawn/jit/AMBuilder b/sourcepawn/jit/AMBuilder
index db304d85..aa919c72 100644
--- a/sourcepawn/jit/AMBuilder
+++ b/sourcepawn/jit/AMBuilder
@@ -37,7 +37,8 @@ binary.AddSourceFiles('sourcepawn/jit', [
'zlib/uncompr.c',
'zlib/zutil.c',
'md5/md5.cpp',
- '../../knight/shared/KeCodeAllocator.cpp'
+ '../../knight/shared/KeCodeAllocator.cpp',
+ '../../public/jit/x86/assembler-x86.cpp'
])
SM.AutoVersion('sourcepawn/jit', binary)
SM.ExtractDebugInfo(extension, binary)
diff --git a/sourcepawn/jit/Makefile.shell b/sourcepawn/jit/Makefile.shell
index f19c7399..3e80d316 100644
--- a/sourcepawn/jit/Makefile.shell
+++ b/sourcepawn/jit/Makefile.shell
@@ -34,6 +34,7 @@ OBJECTS = dll_exports.cpp \
zlib/zutil.c \
OBJECTS += ../../knight/shared/KeCodeAllocator.cpp
+OBJECTS += ../../public/jit/x86/assembler-x86.cpp
##############################################
### CONFIGURE ANY OTHER FLAGS/OPTIONS HERE ###
@@ -75,6 +76,7 @@ ifeq "$(GCC_VERSION)" "4"
endif
OBJ_LINUX := $(OBJECTS:../../knight/shared/%.cpp=$(BIN_DIR)/knight/%.o)
+OBJ_LINUX := $(OBJ_LINUX:../../public/jit/x86/%.cpp=$(BIN_DIR)/%.o)
OBJ_LINUX := $(OBJ_LINUX:%.cpp=$(BIN_DIR)/%.o)
OBJ_LINUX := $(OBJ_LINUX:%.c=$(BIN_DIR)/%.o)
@@ -89,6 +91,9 @@ $(BIN_DIR)/%.o: %.cpp
$(BIN_DIR)/knight/%.o: ../../knight/shared/%.cpp
$(CXX) $(INCLUDE) $(CFLAGS) $(CXXFLAGS) -o $@ -c $<
+$(BIN_DIR)/assembler-x86.o: ../../public/jit/x86/assembler-x86.cpp
+ $(CXX) $(INCLUDE) $(CFLAGS) $(CXXFLAGS) -o $@ -c $<
+
all:
mkdir -p $(BIN_DIR)/x86
mkdir -p $(BIN_DIR)/md5
diff --git a/sourcepawn/jit/x86/jit_x86.cpp b/sourcepawn/jit/x86/jit_x86.cpp
index 034aebe2..29f4ba9e 100644
--- a/sourcepawn/jit/x86/jit_x86.cpp
+++ b/sourcepawn/jit/x86/jit_x86.cpp
@@ -983,10 +983,15 @@ Compiler::emitOp(OPCODE op)
break;
case OP_FLOAT:
- __ fild32(Operand(edi, 0));
- __ subl(esp, 4);
- __ fstp32(Operand(esp, 0));
- __ pop(pri);
+ if (MacroAssemblerX86::Features().sse2) {
+ __ cvtsi2ss(xmm0, Operand(edi, 0));
+ __ movd(pri, xmm0);
+ } else {
+ __ fild32(Operand(edi, 0));
+ __ subl(esp, 4);
+ __ fstp32(Operand(esp, 0));
+ __ pop(pri);
+ }
__ addl(stk, 4);
break;
@@ -994,34 +999,52 @@ Compiler::emitOp(OPCODE op)
case OP_FLOATSUB:
case OP_FLOATMUL:
case OP_FLOATDIV:
- __ subl(esp, 4);
- __ fld32(Operand(edi, 0));
+ if (MacroAssemblerX86::Features().sse2) {
+ __ movss(xmm0, Operand(stk, 0));
+ if (op == OP_FLOATADD)
+ __ addss(xmm0, Operand(stk, 4));
+ else if (op == OP_FLOATSUB)
+ __ subss(xmm0, Operand(stk, 4));
+ else if (op == OP_FLOATMUL)
+ __ mulss(xmm0, Operand(stk, 4));
+ else if (op == OP_FLOATDIV)
+ __ divss(xmm0, Operand(stk, 4));
+ __ movd(pri, xmm0);
+ } else {
+ __ subl(esp, 4);
+ __ fld32(Operand(stk, 0));
- if (op == OP_FLOATADD)
- __ fadd32(Operand(edi, 4));
- else if (op == OP_FLOATSUB)
- __ fsub32(Operand(edi, 4));
- else if (op == OP_FLOATMUL)
- __ fmul32(Operand(edi, 4));
- else if (op == OP_FLOATDIV)
- __ fdiv32(Operand(edi, 4));
+ if (op == OP_FLOATADD)
+ __ fadd32(Operand(stk, 4));
+ else if (op == OP_FLOATSUB)
+ __ fsub32(Operand(stk, 4));
+ else if (op == OP_FLOATMUL)
+ __ fmul32(Operand(stk, 4));
+ else if (op == OP_FLOATDIV)
+ __ fdiv32(Operand(stk, 4));
- __ fstp32(Operand(esp, 0));
- __ pop(pri);
+ __ fstp32(Operand(esp, 0));
+ __ pop(pri);
+ }
__ addl(stk, 8);
break;
case OP_RND_TO_NEAREST:
{
- static float kRoundToNearest = 0.5f;
- // From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf
- __ fld32(Operand(edi, 0));
- __ fadd32(st0, st0);
- __ fadd32(Operand(ExternalAddress(&kRoundToNearest)));
- __ subl(esp, 4);
- __ fistp32(Operand(esp, 0));
- __ pop(pri);
- __ sarl(pri, 1);
+ if (MacroAssemblerX86::Features().sse) {
+ // Assume no one is touching MXCSR.
+ __ cvtss2si(pri, Operand(stk, 0));
+ } else {
+ static float kRoundToNearest = 0.5f;
+ // From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf
+ __ fld32(Operand(stk, 0));
+ __ fadd32(st0, st0);
+ __ fadd32(Operand(ExternalAddress(&kRoundToNearest)));
+ __ subl(esp, 4);
+ __ fistp32(Operand(esp, 0));
+ __ pop(pri);
+ __ sarl(pri, 1);
+ }
__ addl(stk, 4);
break;
}
@@ -1030,7 +1053,7 @@ Compiler::emitOp(OPCODE op)
{
static float kRoundToCeil = -0.5f;
// From http://wurstcaptures.untergrund.net/assembler_tricks.html#fastfloorf
- __ fld32(Operand(edi, 0));
+ __ fld32(Operand(stk, 0));
__ fadd32(st0, st0);
__ fsubr32(Operand(ExternalAddress(&kRoundToCeil)));
__ subl(esp, 4);
@@ -1043,15 +1066,19 @@ Compiler::emitOp(OPCODE op)
}
case OP_RND_TO_ZERO:
- __ fld32(Operand(edi, 0));
- __ subl(esp, 8);
- __ fstcw(Operand(esp, 4));
- __ movl(Operand(esp, 0), 0xfff);
- __ fldcw(Operand(esp, 0));
- __ fistp32(Operand(esp, 0));
- __ pop(pri);
- __ fldcw(Operand(esp, 0));
- __ addl(esp, 4);
+ if (MacroAssemblerX86::Features().sse) {
+ __ cvttss2si(pri, Operand(stk, 0));
+ } else {
+ __ fld32(Operand(stk, 0));
+ __ subl(esp, 8);
+ __ fstcw(Operand(esp, 4));
+ __ movl(Operand(esp, 0), 0xfff);
+ __ fldcw(Operand(esp, 0));
+ __ fistp32(Operand(esp, 0));
+ __ pop(pri);
+ __ fldcw(Operand(esp, 0));
+ __ addl(esp, 4);
+ }
__ addl(stk, 4);
break;
@@ -1071,10 +1098,15 @@ Compiler::emitOp(OPCODE op)
case OP_FLOATCMP:
{
Label bl, ab, done;
- __ fld32(Operand(edi, 0));
- __ fld32(Operand(edi, 4));
- __ fucomip(st1);
- __ fstp(st0);
+ if (MacroAssemblerX86::Features().sse) {
+ __ movss(xmm0, Operand(stk, 4));
+ __ ucomiss(xmm0, Operand(stk, 0));
+ } else {
+ __ fld32(Operand(stk, 0));
+ __ fld32(Operand(stk, 4));
+ __ fucomip(st1);
+ __ fstp(st0);
+ }
__ j(above, &ab);
__ j(below, &bl);
__ xorl(pri, pri);
@@ -1869,6 +1901,14 @@ bool JITX86::InitializeJIT()
if (!m_pJitGenArray)
return false;
+ MacroAssemblerX86 masm;
+ MacroAssemblerX86::GenerateFeatureDetection(masm);
+ void *code = LinkCode(masm);
+ if (!code)
+ return false;
+ MacroAssemblerX86::RunFeatureDetection(code);
+ KE_FreeCode(g_pCodeCache, code);
+
return true;
}
diff --git a/sourcepawn/jit/x86/jit_x86.h b/sourcepawn/jit/x86/jit_x86.h
index a59fc8a3..2b4c6d41 100644
--- a/sourcepawn/jit/x86/jit_x86.h
+++ b/sourcepawn/jit/x86/jit_x86.h
@@ -35,7 +35,7 @@
#include
#include
#include
-#include
+#include
#include
#include "jit_shared.h"
#include "BaseRuntime.h"