diff --git a/AMBuildScript b/AMBuildScript index e703820..fc9110f 100644 --- a/AMBuildScript +++ b/AMBuildScript @@ -238,8 +238,11 @@ class DHooksConfig(object): os.path.join(self.sm_root, 'sourcepawn', 'include'), os.path.join(self.sm_root, 'sourcepawn', 'vm'), os.path.join(self.sm_root, 'sourcepawn', 'vm', 'x86'), - os.path.join(self.sm_root, 'public', 'amtl', 'include'), + os.path.join(self.sm_root, 'public', 'amtl', 'include'), os.path.join(self.sm_root, 'public', 'amtl', 'amtl'), + os.path.join(builder.currentSourcePath, 'DynamicHooks', 'thirdparty'), + os.path.join(builder.currentSourcePath, 'DynamicHooks', 'thirdparty', 'AsmJit'), + os.path.join(builder.currentSourcePath, 'DynamicHooks'), ] @@ -256,6 +259,45 @@ program.sources += [ 'natives.cpp', 'vhook.cpp', 'util.cpp', + 'dynhooks_sourcepawn.cpp', +] + +# DynamicHooks +program.sources += [ + os.path.join('DynamicHooks', 'asm.cpp'), + os.path.join('DynamicHooks', 'hook.cpp'), + os.path.join('DynamicHooks', 'manager.cpp'), + os.path.join('DynamicHooks', 'registers.cpp'), + os.path.join('DynamicHooks', 'utilities.cpp'), + os.path.join('DynamicHooks', 'conventions', 'x86MsCdecl.cpp'), + os.path.join('DynamicHooks', 'conventions', 'x86MsStdcall.cpp'), + os.path.join('DynamicHooks', 'conventions', 'x86MsThiscall.cpp'), +] + +# ASMJit +program.sources += [ + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'assembler.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'compiler.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'compilercontext.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'constpool.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'containers.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'cpuinfo.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'globals.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'hlstream.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'logger.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'operand.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'podvector.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'runtime.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'utils.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'vmem.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'base', 'zone.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'x86', 'x86assembler.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'x86', 'x86compiler.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'x86', 'x86compilercontext.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'x86', 'x86compilerfunc.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'x86', 'x86inst.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'x86', 'x86operand.cpp'), + os.path.join('DynamicHooks', 'thirdparty', 'AsmJit', 'x86', 'x86operand_regs.cpp'), ] program.sources += [os.path.join(DHooks.sm_root, 'public', 'smsdk_ext.cpp')] diff --git a/DynamicHooks/asm.cpp b/DynamicHooks/asm.cpp new file mode 100644 index 0000000..2969b8b --- /dev/null +++ b/DynamicHooks/asm.cpp @@ -0,0 +1,457 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "asm.h" + +#ifndef _WIN32 + +#include + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif // _GNU_SOURCE + +#include +#include + +#define REG_EAX 0 +#define REG_ECX 1 +#define REG_EDX 2 +#define REG_EBX 3 + +#define IA32_MOV_REG_IMM 0xB8 // encoding is +r +#endif + +/** +* Checks if a call to a fpic thunk has just been written into dest. +* If found replaces it with a direct mov that sets the required register to the value of pc. +* +* @param dest Destination buffer where a call opcode + addr (5 bytes) has just been written. +* @param pc The program counter value that needs to be set (usually the next address from the source). +* @noreturn +*/ +void check_thunks(unsigned char *dest, unsigned char *pc) +{ +#if defined _WIN32 + return; +#else + /* Step write address back 4 to the start of the function address */ + unsigned char *writeaddr = dest - 4; + unsigned char *calloffset = *(unsigned char **)writeaddr; + unsigned char *calladdr = (unsigned char *)(dest + (intptr_t)calloffset); + + /* Lookup name of function being called */ + if ((*calladdr == 0x8B) && (*(calladdr+2) == 0x24) && (*(calladdr+3) == 0xC3)) + { + //a thunk maybe? + char movByte = IA32_MOV_REG_IMM; + + /* Calculate the correct mov opcode */ + switch (*(calladdr+1)) + { + case 0x04: + { + movByte += REG_EAX; + break; + } + case 0x1C: + { + movByte += REG_EBX; + break; + } + case 0x0C: + { + movByte += REG_ECX; + break; + } + case 0x14: + { + movByte += REG_EDX; + break; + } + default: + { + break; + } + } + + /* Move our write address back one to where the call opcode was */ + writeaddr--; + + + /* Write our mov */ + *writeaddr = movByte; + writeaddr++; + + /* Write the value - The provided program counter value */ + *(void **)writeaddr = (void *)pc; + writeaddr += 4; + } + + return; +#endif +} + +//if dest is NULL, returns minimum number of bytes needed to be copied +//if dest is not NULL, it will copy the bytes to dest as well as fix CALLs and JMPs +//http://www.devmaster.net/forums/showthread.php?t=2311 +int copy_bytes(unsigned char *func, unsigned char* dest, int required_len) { + int bytecount = 0; + + while(bytecount < required_len && *func != 0xCC) + { + // prefixes F0h, F2h, F3h, 66h, 67h, D8h-DFh, 2Eh, 36h, 3Eh, 26h, 64h and 65h + int operandSize = 4; + int FPU = 0; + int twoByte = 0; + unsigned char opcode = 0x90; + unsigned char modRM = 0xFF; + while(*func == 0xF0 || + *func == 0xF2 || + *func == 0xF3 || + (*func & 0xFC) == 0x64 || + (*func & 0xF8) == 0xD8 || + (*func & 0x7E) == 0x62) + { + if(*func == 0x66) + { + operandSize = 2; + } + else if((*func & 0xF8) == 0xD8) + { + FPU = *func; + if (dest) + *dest++ = *func++; + else + func++; + bytecount++; + break; + } + + if (dest) + *dest++ = *func++; + else + func++; + bytecount++; + } + + // two-byte opcode byte + if(*func == 0x0F) + { + twoByte = 1; + if (dest) + *dest++ = *func++; + else + func++; + bytecount++; + } + + // opcode byte + opcode = *func++; + if (dest) *dest++ = opcode; + bytecount++; + + // mod R/M byte + modRM = 0xFF; + if(FPU) + { + if((opcode & 0xC0) != 0xC0) + { + modRM = opcode; + } + } + else if(!twoByte) + { + if((opcode & 0xC4) == 0x00 || + (opcode & 0xF4) == 0x60 && ((opcode & 0x0A) == 0x02 || (opcode & 0x09) == 0x09) || + (opcode & 0xF0) == 0x80 || + (opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02 || + (opcode & 0xFC) == 0xD0 || + (opcode & 0xF6) == 0xF6) + { + modRM = *func++; + if (dest) *dest++ = modRM; + bytecount++; + } + } + else + { + if((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D || + (opcode & 0xF0) == 0x30 || + opcode == 0x77 || + (opcode & 0xF0) == 0x80 || + (opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02 || + (opcode & 0xF8) == 0xC8) + { + // No mod R/M byte + } + else + { + modRM = *func++; + if (dest) *dest++ = modRM; + bytecount++; + } + } + + // SIB + if((modRM & 0x07) == 0x04 && + (modRM & 0xC0) != 0xC0) + { + if (dest) + *dest++ = *func++; //SIB + else + func++; + bytecount++; + } + + // mod R/M displacement + + // Dword displacement, no base + if((modRM & 0xC5) == 0x05) { + if (dest) { + *(unsigned int*)dest = *(unsigned int*)func; + dest += 4; + } + func += 4; + bytecount += 4; + } + + // Byte displacement + if((modRM & 0xC0) == 0x40) { + if (dest) + *dest++ = *func++; + else + func++; + bytecount++; + } + + // Dword displacement + if((modRM & 0xC0) == 0x80) { + if (dest) { + *(unsigned int*)dest = *(unsigned int*)func; + dest += 4; + } + func += 4; + bytecount += 4; + } + + // immediate + if(FPU) + { + // Can't have immediate operand + } + else if(!twoByte) + { + if((opcode & 0xC7) == 0x04 || + (opcode & 0xFE) == 0x6A || // PUSH/POP/IMUL + (opcode & 0xF0) == 0x70 || // Jcc + opcode == 0x80 || + opcode == 0x83 || + (opcode & 0xFD) == 0xA0 || // MOV + opcode == 0xA8 || // TEST + (opcode & 0xF8) == 0xB0 || // MOV + (opcode & 0xFE) == 0xC0 || // RCL + opcode == 0xC6 || // MOV + opcode == 0xCD || // INT + (opcode & 0xFE) == 0xD4 || // AAD/AAM + (opcode & 0xF8) == 0xE0 || // LOOP/JCXZ + opcode == 0xEB || + opcode == 0xF6 && (modRM & 0x30) == 0x00) // TEST + { + if (dest) + *dest++ = *func++; + else + func++; + bytecount++; + } + else if((opcode & 0xF7) == 0xC2) // RET + { + if (dest) { + *(unsigned short*)dest = *(unsigned short*)func; + dest += 2; + } + func += 2; + bytecount += 2; + } + else if((opcode & 0xFC) == 0x80 || + (opcode & 0xC7) == 0x05 || + (opcode & 0xF8) == 0xB8 || + (opcode & 0xFE) == 0xE8 || // CALL/Jcc + (opcode & 0xFE) == 0x68 || + (opcode & 0xFC) == 0xA0 || + (opcode & 0xEE) == 0xA8 || + opcode == 0xC7 || + opcode == 0xF7 && (modRM & 0x30) == 0x00) + { + if (dest) { + //Fix CALL/JMP offset + if ((opcode & 0xFE) == 0xE8) { + if (operandSize == 4) + { + *(long*)dest = ((func + *(long*)func) - dest); + + //pRED* edit. func is the current address of the call address, +4 is the next instruction, so the value of $pc + check_thunks(dest+4, func+4); + } + else + *(short*)dest = ((func + *(short*)func) - dest); + + } else { + if (operandSize == 4) + *(unsigned long*)dest = *(unsigned long*)func; + else + *(unsigned short*)dest = *(unsigned short*)func; + } + dest += operandSize; + } + func += operandSize; + bytecount += operandSize; + + } + } + else + { + if(opcode == 0xBA || // BT + opcode == 0x0F || // 3DNow! + (opcode & 0xFC) == 0x70 || // PSLLW + (opcode & 0xF7) == 0xA4 || // SHLD + opcode == 0xC2 || + opcode == 0xC4 || + opcode == 0xC5 || + opcode == 0xC6) + { + if (dest) + *dest++ = *func++; + else + func++; + } + else if((opcode & 0xF0) == 0x80) // Jcc -i + { + if (dest) { + if (operandSize == 4) + *(unsigned long*)dest = *(unsigned long*)func; + else + *(unsigned short*)dest = *(unsigned short*)func; + + dest += operandSize; + } + func += operandSize; + bytecount += operandSize; + } + } + } + + return bytecount; +} + +//insert a specific JMP instruction at the given location +void inject_jmp(void* src, void* dest) { + *(unsigned char*)src = OP_JMP; + *(long*)((unsigned char*)src+1) = (long)((unsigned char*)dest - ((unsigned char*)src + OP_JMP_SIZE)); +} + +//fill a given block with NOPs +void fill_nop(void* src, unsigned int len) { + unsigned char* src2 = (unsigned char*)src; + while (len) { + *src2++ = OP_NOP; + --len; + } +} + +void* eval_jump(void* src) { + unsigned char* addr = (unsigned char*)src; + + if (!addr) return 0; + + //import table jump + if (addr[0] == OP_PREFIX && addr[1] == OP_JMP_SEG) { + addr += 2; + addr = *(unsigned char**)addr; + //TODO: if addr points into the IAT + return *(void**)addr; + } + + //8bit offset + else if (addr[0] == OP_JMP_BYTE) { + addr = &addr[OP_JMP_BYTE_SIZE] + *(char*)&addr[1]; + //mangled 32bit jump? + if (addr[0] = OP_JMP) { + addr = addr + *(int*)&addr[1]; + } + return addr; + } + /* + //32bit offset + else if (addr[0] == OP_JMP) { + addr = &addr[OP_JMP_SIZE] + *(int*)&addr[1]; + } + */ + + return addr; +} +/* +from ms detours package +static bool detour_is_imported(PBYTE pbCode, PBYTE pbAddress) +{ +MEMORY_BASIC_INFORMATION mbi; +VirtualQuery((PVOID)pbCode, &mbi, sizeof(mbi)); +__try { +PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)mbi.AllocationBase; +if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) { +return false; +} + +PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader + +pDosHeader->e_lfanew); +if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) { +return false; +} + +if (pbAddress >= ((PBYTE)pDosHeader + +pNtHeader->OptionalHeader +.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress) && +pbAddress < ((PBYTE)pDosHeader + +pNtHeader->OptionalHeader +.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress + +pNtHeader->OptionalHeader +.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].Size)) { +return true; +} +return false; +} +__except(EXCEPTION_EXECUTE_HANDLER) { +return false; +} +} +*/ diff --git a/DynamicHooks/asm.h b/DynamicHooks/asm.h new file mode 100644 index 0000000..8f3b9fd --- /dev/null +++ b/DynamicHooks/asm.h @@ -0,0 +1,70 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef __ASM_H__ +#define __ASM_H__ + +#define OP_JMP 0xE9 +#define OP_JMP_SIZE 5 + +#define OP_NOP 0x90 +#define OP_NOP_SIZE 1 + +#define OP_PREFIX 0xFF +#define OP_JMP_SEG 0x25 + +#define OP_JMP_BYTE 0xEB +#define OP_JMP_BYTE_SIZE 2 + +#ifdef __cplusplus +extern "C" { +#endif + + void check_thunks(unsigned char *dest, unsigned char *pc); + + //if dest is NULL, returns minimum number of bytes needed to be copied + //if dest is not NULL, it will copy the bytes to dest as well as fix CALLs and JMPs + //http://www.devmaster.net/forums/showthread.php?t=2311 + int copy_bytes(unsigned char *func, unsigned char* dest, int required_len); + + //insert a specific JMP instruction at the given location + void inject_jmp(void* src, void* dest); + + //fill a given block with NOPs + void fill_nop(void* src, unsigned int len); + + //evaluate a JMP at the target + void* eval_jump(void* src); + +#ifdef __cplusplus +} +#endif + +#endif //__ASM_H__ diff --git a/DynamicHooks/convention.h b/DynamicHooks/convention.h new file mode 100644 index 0000000..2e86018 --- /dev/null +++ b/DynamicHooks/convention.h @@ -0,0 +1,216 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _CONVENTION_H +#define _CONVENTION_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include +#include +#include + +#include "registers.h" + +// ============================================================================ +// >> DataType_t +// ============================================================================ +enum DataType_t +{ + DATA_TYPE_VOID, + DATA_TYPE_BOOL, + DATA_TYPE_CHAR, + DATA_TYPE_UCHAR, + DATA_TYPE_SHORT, + DATA_TYPE_USHORT, + DATA_TYPE_INT, + DATA_TYPE_UINT, + DATA_TYPE_LONG, + DATA_TYPE_ULONG, + DATA_TYPE_LONG_LONG, + DATA_TYPE_ULONG_LONG, + DATA_TYPE_FLOAT, + DATA_TYPE_DOUBLE, + DATA_TYPE_POINTER, + DATA_TYPE_STRING, + DATA_TYPE_OBJECT +}; + +typedef struct DataTypeSized_s { + DataTypeSized_s() + { + type = DATA_TYPE_POINTER; + size = 0; + } + DataType_t type; + size_t size; +} DataTypeSized_t; + + +// ============================================================================ +// >> FUNCTIONS +// ============================================================================ +/* +Returns the size after applying alignment. + +@param : +The size that should be aligned. + +@param : +The alignment that should be used. +*/ +inline int Align(int size, int alignment) +{ + int unaligned = size % alignment; + if (unaligned == 0) + return size; + + return size + (alignment - unaligned); +} + +/* +Returns the size of a data type after applying alignment. + +@param : +The data type you would like to get the size of. + +@param : +The alignment that should be used. +*/ +inline int GetDataTypeSize(DataTypeSized_t type, int iAlignment=4) +{ + switch(type.type) + { + case DATA_TYPE_VOID: return 0; + case DATA_TYPE_BOOL: return Align(sizeof(bool), iAlignment); + case DATA_TYPE_CHAR: return Align(sizeof(char), iAlignment); + case DATA_TYPE_UCHAR: return Align(sizeof(unsigned char), iAlignment); + case DATA_TYPE_SHORT: return Align(sizeof(short), iAlignment); + case DATA_TYPE_USHORT: return Align(sizeof(unsigned short), iAlignment); + case DATA_TYPE_INT: return Align(sizeof(int), iAlignment); + case DATA_TYPE_UINT: return Align(sizeof(unsigned int), iAlignment); + case DATA_TYPE_LONG: return Align(sizeof(long), iAlignment); + case DATA_TYPE_ULONG: return Align(sizeof(unsigned long), iAlignment); + case DATA_TYPE_LONG_LONG: return Align(sizeof(long long), iAlignment); + case DATA_TYPE_ULONG_LONG: return Align(sizeof(unsigned long long), iAlignment); + case DATA_TYPE_FLOAT: return Align(sizeof(float), iAlignment); + case DATA_TYPE_DOUBLE: return Align(sizeof(double), iAlignment); + case DATA_TYPE_POINTER: return Align(sizeof(void *), iAlignment); + case DATA_TYPE_STRING: return Align(sizeof(char *), iAlignment); + case DATA_TYPE_OBJECT: return type.size; + default: puts("Unknown data type."); + } + return 0; +} + +// ============================================================================ +// >> CLASSES +// ============================================================================ +/* +This is the base class for every calling convention. Inherit from this class +to create your own calling convention. +*/ +class ICallingConvention +{ +public: + /* + Initializes the calling convention. + + @param : + A list of DataType_t objects, which define the arguments of the function. + + @param : + The return type of the function. + */ + ICallingConvention(std::vector vecArgTypes, DataTypeSized_t returnType, int iAlignment=4) + { + m_vecArgTypes = vecArgTypes; + std::vector::iterator it = m_vecArgTypes.begin(); + for (; it != m_vecArgTypes.end(); it++) + { + DataTypeSized_t &type = *it; + if (!type.size) + type.size = GetDataTypeSize(type); + } + m_returnType = returnType; + if (!m_returnType.size) + m_returnType.size = GetDataTypeSize(m_returnType); + m_iAlignment = iAlignment; + } + + /* + This should return a list of Register_t values. These registers will be + saved for later access. + */ + virtual std::list GetRegisters() = 0; + + /* + Returns the number of bytes that should be added to the stack to clean up. + */ + virtual int GetPopSize() = 0; + + virtual int GetArgStackSize() = 0; + virtual void** GetStackArgumentPtr(CRegisters* pRegisters) = 0; + + /* + Returns a pointer to the argument at the given index. + + @param : + The index of the argument. + + @param : + A snapshot of all saved registers. + */ + virtual void* GetArgumentPtr(int iIndex, CRegisters* pRegisters) = 0; + + /* + */ + virtual void ArgumentPtrChanged(int iIndex, CRegisters* pRegisters, void* pArgumentPtr) = 0; + + /* + Returns a pointer to the return value. + + @param : + A snapshot of all saved registers. + */ + virtual void* GetReturnPtr(CRegisters* pRegisters) = 0; + + /* + */ + virtual void ReturnPtrChanged(CRegisters* pRegisters, void* pReturnPtr) = 0; + +public: + std::vector m_vecArgTypes; + DataTypeSized_t m_returnType; + int m_iAlignment; +}; + +#endif // _CONVENTION_H \ No newline at end of file diff --git a/DynamicHooks/conventions/x86GccCdecl.h b/DynamicHooks/conventions/x86GccCdecl.h new file mode 100644 index 0000000..7a5867b --- /dev/null +++ b/DynamicHooks/conventions/x86GccCdecl.h @@ -0,0 +1,46 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _X86_GCC_CDECL_H +#define _X86_GCC_CDECL_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "x86MsCdecl.h" + + +// ============================================================================ +// >> CLASSES +// ============================================================================ +typedef x86MsCdecl x86GccCdecl; + + +#endif // _X86_GCC_CDECL_H \ No newline at end of file diff --git a/DynamicHooks/conventions/x86GccThiscall.h b/DynamicHooks/conventions/x86GccThiscall.h new file mode 100644 index 0000000..d754dbc --- /dev/null +++ b/DynamicHooks/conventions/x86GccThiscall.h @@ -0,0 +1,46 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _X86_GCC_THISCALL_H +#define _X86_GCC_THISCALL_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "x86GccCdecl.h" + + +// ============================================================================ +// >> CLASSES +// ============================================================================ +typedef x86GccCdecl x86GccThiscall; + + +#endif // _X86_GCC_THISCALL_H \ No newline at end of file diff --git a/DynamicHooks/conventions/x86MsCdecl.cpp b/DynamicHooks/conventions/x86MsCdecl.cpp new file mode 100644 index 0000000..7729408 --- /dev/null +++ b/DynamicHooks/conventions/x86MsCdecl.cpp @@ -0,0 +1,145 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "x86MsCdecl.h" +#include + + +// ============================================================================ +// >> x86MsCdecl +// ============================================================================ +x86MsCdecl::x86MsCdecl(std::vector vecArgTypes, DataTypeSized_t returnType, int iAlignment) : + ICallingConvention(vecArgTypes, returnType, iAlignment) +{ + if (m_returnType.size > 4) + { + m_pReturnBuffer = malloc(m_returnType.size); + } + else + { + m_pReturnBuffer = NULL; + } +} + +x86MsCdecl::~x86MsCdecl() +{ + if (m_pReturnBuffer) + { + free(m_pReturnBuffer); + } +} + +std::list x86MsCdecl::GetRegisters() +{ + std::list registers; + + registers.push_back(ESP); + + if (m_returnType.type == DATA_TYPE_FLOAT || m_returnType.type == DATA_TYPE_DOUBLE) + { + registers.push_back(ST0); + } + else + { + registers.push_back(EAX); + if (m_pReturnBuffer) + { + registers.push_back(EDX); + } + } + + return registers; +} + +int x86MsCdecl::GetPopSize() +{ + return 0; +} + +int x86MsCdecl::GetArgStackSize() +{ + int iArgStackSize = 0; + + for (unsigned int i = 0; i < m_vecArgTypes.size(); i++) + { + iArgStackSize += m_vecArgTypes[i].size; + } + + return iArgStackSize; +} + +void** x86MsCdecl::GetStackArgumentPtr(CRegisters* pRegisters) +{ + return (void **)(pRegisters->m_esp->GetValue() + 4); +} + +void* x86MsCdecl::GetArgumentPtr(int iIndex, CRegisters* pRegisters) +{ + int iOffset = 4; + for(int i=0; i < iIndex; i++) + { + iOffset += m_vecArgTypes[i].size; + } + + return (void *) (pRegisters->m_esp->GetValue() + iOffset); +} + +void x86MsCdecl::ArgumentPtrChanged(int iIndex, CRegisters* pRegisters, void* pArgumentPtr) +{ +} + +void* x86MsCdecl::GetReturnPtr(CRegisters* pRegisters) +{ + if (m_returnType.type == DATA_TYPE_FLOAT || m_returnType.type == DATA_TYPE_DOUBLE) + return pRegisters->m_st0->m_pAddress; + + if (m_pReturnBuffer) + { + // First half in eax, second half in edx + memcpy(m_pReturnBuffer, pRegisters->m_eax, 4); + memcpy((void *) ((unsigned long) m_pReturnBuffer + 4), pRegisters->m_edx, 4); + return m_pReturnBuffer; + } + + return pRegisters->m_eax->m_pAddress; +} + +void x86MsCdecl::ReturnPtrChanged(CRegisters* pRegisters, void* pReturnPtr) +{ + if (m_pReturnBuffer) + { + // First half in eax, second half in edx + memcpy(pRegisters->m_eax, m_pReturnBuffer, 4); + memcpy(pRegisters->m_edx, (void *) ((unsigned long) m_pReturnBuffer + 4), 4); + } +} \ No newline at end of file diff --git a/DynamicHooks/conventions/x86MsCdecl.h b/DynamicHooks/conventions/x86MsCdecl.h new file mode 100644 index 0000000..cfda15e --- /dev/null +++ b/DynamicHooks/conventions/x86MsCdecl.h @@ -0,0 +1,84 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _X86_MS_CDECL_H +#define _X86_MS_CDECL_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "../convention.h" + + +// ============================================================================ +// >> CLASSES +// ============================================================================ +/* +Source: DynCall manual and Windows docs + +Registers: + - eax = return value + - edx = return value + - esp = stack pointer + - st0 = floating point return value + +Parameter passing: + - stack parameter order: right-to-left + - caller cleans up the stack + - all arguments are pushed onto the stack + - alignment: 4 bytes + +Return values: + - return values of pointer or intergral type (<= 32 bits) are returned via the eax register + - integers > 32 bits are returned via the eax and edx registers + - floating pointer types are returned via the st0 register +*/ +class x86MsCdecl: public ICallingConvention +{ +public: + x86MsCdecl(std::vector vecArgTypes, DataTypeSized_t returnType, int iAlignment=4); + ~x86MsCdecl(); + + virtual std::list GetRegisters(); + virtual int GetPopSize(); + virtual int GetArgStackSize(); + virtual void** GetStackArgumentPtr(CRegisters* pRegisters); + + virtual void* GetArgumentPtr(int iIndex, CRegisters* pRegisters); + virtual void ArgumentPtrChanged(int iIndex, CRegisters* pRegisters, void* pArgumentPtr); + + virtual void* GetReturnPtr(CRegisters* pRegisters); + virtual void ReturnPtrChanged(CRegisters* pRegisters, void* pReturnPtr); + +private: + void* m_pReturnBuffer; +}; + +#endif // _X86_MS_CDECL_H \ No newline at end of file diff --git a/DynamicHooks/conventions/x86MsStdcall.cpp b/DynamicHooks/conventions/x86MsStdcall.cpp new file mode 100644 index 0000000..3db8236 --- /dev/null +++ b/DynamicHooks/conventions/x86MsStdcall.cpp @@ -0,0 +1,152 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "x86MsStdcall.h" +#include + + +// ============================================================================ +// >> x86MsStdcall +// ============================================================================ +x86MsStdcall::x86MsStdcall(std::vector vecArgTypes, DataTypeSized_t returnType, int iAlignment) : + ICallingConvention(vecArgTypes, returnType, iAlignment) +{ + if (m_returnType.size > 4) + { + m_pReturnBuffer = malloc(m_returnType.size); + } + else + { + m_pReturnBuffer = NULL; + } +} + +x86MsStdcall::~x86MsStdcall() +{ + if (m_pReturnBuffer) + { + free(m_pReturnBuffer); + } +} + +std::list x86MsStdcall::GetRegisters() +{ + std::list registers; + + registers.push_back(ESP); + + if (m_returnType.type == DATA_TYPE_FLOAT || m_returnType.type == DATA_TYPE_DOUBLE) + { + registers.push_back(ST0); + } + else + { + registers.push_back(EAX); + if (m_pReturnBuffer) + { + registers.push_back(EDX); + } + } + + return registers; +} + +int x86MsStdcall::GetPopSize() +{ + int iPopSize = 0; + + for(unsigned int i=0; i < m_vecArgTypes.size(); i++) + { + iPopSize += m_vecArgTypes[i].size; + } + + return iPopSize; +} + +int x86MsStdcall::GetArgStackSize() +{ + int iArgStackSize = 0; + + for (unsigned int i = 0; i < m_vecArgTypes.size(); i++) + { + iArgStackSize += m_vecArgTypes[i].size; + } + + return iArgStackSize; +} + +void** x86MsStdcall::GetStackArgumentPtr(CRegisters* pRegisters) +{ + return (void **)(pRegisters->m_esp->GetValue() + 4); +} + +void* x86MsStdcall::GetArgumentPtr(int iIndex, CRegisters* pRegisters) +{ + int iOffset = 4; + for(int i=0; i < iIndex; i++) + { + iOffset += m_vecArgTypes[i].size; + } + + return (void *) (pRegisters->m_esp->GetValue() + iOffset); +} + +void x86MsStdcall::ArgumentPtrChanged(int iIndex, CRegisters* pRegisters, void* pArgumentPtr) +{ +} + +void* x86MsStdcall::GetReturnPtr(CRegisters* pRegisters) +{ + if (m_returnType.type == DATA_TYPE_FLOAT || m_returnType.type == DATA_TYPE_DOUBLE) + return pRegisters->m_st0->m_pAddress; + + if (m_pReturnBuffer) + { + // First half in eax, second half in edx + memcpy(m_pReturnBuffer, pRegisters->m_eax, 4); + memcpy((void *) ((unsigned long) m_pReturnBuffer + 4), pRegisters->m_edx, 4); + return m_pReturnBuffer; + } + + return pRegisters->m_eax->m_pAddress; +} + +void x86MsStdcall::ReturnPtrChanged(CRegisters* pRegisters, void* pReturnPtr) +{ + if (m_pReturnBuffer) + { + // First half in eax, second half in edx + memcpy(pRegisters->m_eax, m_pReturnBuffer, 4); + memcpy(pRegisters->m_edx, (void *) ((unsigned long) m_pReturnBuffer + 4), 4); + } +} \ No newline at end of file diff --git a/DynamicHooks/conventions/x86MsStdcall.h b/DynamicHooks/conventions/x86MsStdcall.h new file mode 100644 index 0000000..51cb42f --- /dev/null +++ b/DynamicHooks/conventions/x86MsStdcall.h @@ -0,0 +1,84 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _X86_MS_STDCALL_H +#define _X86_MS_STDCALL_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "../convention.h" + + +// ============================================================================ +// >> CLASSES +// ============================================================================ +/* +Source: DynCall manual and Windows docs + +Registers: + - eax = return value + - edx = return value + - esp = stack pointer + - st0 = floating point return value + +Parameter passing: + - stack parameter order: right-to-left + - callee cleans up the stack + - all arguments are pushed onto the stack + - alignment: 4 bytes + +Return values: + - return values of pointer or intergral type (<= 32 bits) are returned via the eax register + - integers > 32 bits are returned via the eax and edx registers + - floating pointer types are returned via the st0 register +*/ +class x86MsStdcall: public ICallingConvention +{ +public: + x86MsStdcall(std::vector vecArgTypes, DataTypeSized_t returnType, int iAlignment=4); + ~x86MsStdcall(); + + virtual std::list GetRegisters(); + virtual int GetPopSize(); + virtual int GetArgStackSize(); + virtual void** GetStackArgumentPtr(CRegisters* pRegisters); + + virtual void* GetArgumentPtr(int iIndex, CRegisters* pRegisters); + virtual void ArgumentPtrChanged(int iIndex, CRegisters* pRegisters, void* pArgumentPtr); + + virtual void* GetReturnPtr(CRegisters* pRegisters); + virtual void ReturnPtrChanged(CRegisters* pRegisters, void* pReturnPtr); + +private: + void* m_pReturnBuffer; +}; + +#endif // _X86_MS_STDCALL_H \ No newline at end of file diff --git a/DynamicHooks/conventions/x86MsThiscall.cpp b/DynamicHooks/conventions/x86MsThiscall.cpp new file mode 100644 index 0000000..c599db9 --- /dev/null +++ b/DynamicHooks/conventions/x86MsThiscall.cpp @@ -0,0 +1,161 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "x86MsThiscall.h" +#include + + +// ============================================================================ +// >> x86MsThiscall +// ============================================================================ +x86MsThiscall::x86MsThiscall(std::vector vecArgTypes, DataTypeSized_t returnType, int iAlignment) : + ICallingConvention(vecArgTypes, returnType, iAlignment) +{ + if (m_returnType.size > 4) + { + m_pReturnBuffer = malloc(m_returnType.size); + } + else + { + m_pReturnBuffer = NULL; + } +} + +x86MsThiscall::~x86MsThiscall() +{ + if (m_pReturnBuffer) + { + free(m_pReturnBuffer); + } +} + +std::list x86MsThiscall::GetRegisters() +{ + std::list registers; + + registers.push_back(ESP); + registers.push_back(ECX); + + if (m_returnType.type == DATA_TYPE_FLOAT || m_returnType.type == DATA_TYPE_DOUBLE) + { + registers.push_back(ST0); + } + else + { + registers.push_back(EAX); + if (m_pReturnBuffer) + { + registers.push_back(EDX); + } + } + + return registers; +} + +int x86MsThiscall::GetPopSize() +{ + // This pointer. + // FIXME LINUX + //int iPopSize = GetDataTypeSize(DATA_TYPE_POINTER, m_iAlignment); + int iPopSize = 0; + + for(unsigned int i=0; i < m_vecArgTypes.size(); i++) + { + iPopSize += m_vecArgTypes[i].size; + } + + return iPopSize; +} + +int x86MsThiscall::GetArgStackSize() +{ + int iArgStackSize = 0; + + for (unsigned int i = 0; i < m_vecArgTypes.size(); i++) + { + iArgStackSize += m_vecArgTypes[i].size; + } + + return iArgStackSize; +} + +void** x86MsThiscall::GetStackArgumentPtr(CRegisters* pRegisters) +{ + return (void **)(pRegisters->m_esp->GetValue() + 4); +} + +void* x86MsThiscall::GetArgumentPtr(int iIndex, CRegisters* pRegisters) +{ + if (iIndex == 0) + { + return pRegisters->m_ecx->m_pAddress; + } + + int iOffset = 4; + for(int i=0; i < iIndex-1; i++) + { + iOffset += m_vecArgTypes[i].size; + } + + return (void *) (pRegisters->m_esp->GetValue() + iOffset); +} + +void x86MsThiscall::ArgumentPtrChanged(int iIndex, CRegisters* pRegisters, void* pArgumentPtr) +{ +} + +void* x86MsThiscall::GetReturnPtr(CRegisters* pRegisters) +{ + if (m_returnType.type == DATA_TYPE_FLOAT || m_returnType.type == DATA_TYPE_DOUBLE) + return pRegisters->m_st0->m_pAddress; + + if (m_pReturnBuffer) + { + // First half in eax, second half in edx + memcpy(m_pReturnBuffer, pRegisters->m_eax, 4); + memcpy((void *) ((unsigned long) m_pReturnBuffer + 4), pRegisters->m_edx, 4); + return m_pReturnBuffer; + } + + return pRegisters->m_eax->m_pAddress; +} + +void x86MsThiscall::ReturnPtrChanged(CRegisters* pRegisters, void* pReturnPtr) +{ + if (m_pReturnBuffer) + { + // First half in eax, second half in edx + memcpy(pRegisters->m_eax, m_pReturnBuffer, 4); + memcpy(pRegisters->m_edx, (void *) ((unsigned long) m_pReturnBuffer + 4), 4); + } +} \ No newline at end of file diff --git a/DynamicHooks/conventions/x86MsThiscall.h b/DynamicHooks/conventions/x86MsThiscall.h new file mode 100644 index 0000000..1c5490b --- /dev/null +++ b/DynamicHooks/conventions/x86MsThiscall.h @@ -0,0 +1,85 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _X86_MS_THISCALL_H +#define _X86_MS_THISCALL_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "../convention.h" + + +// ============================================================================ +// >> CLASSES +// ============================================================================ +/* +Source: DynCall manual and Windows docs + +Registers: + - eax = return value + - ecx = this pointer + - edx = return value + - esp = stack pointer + - st0 = floating point return value + +Parameter passing: + - stack parameter order: right-to-left + - callee cleans up the stack + - all other arguments are pushed onto the stack + - alignment: 4 bytes + +Return values: + - return values of pointer or intergral type (<= 32 bits) are returned via the eax register + - integers > 32 bits are returned via the eax and edx registers + - floating pointer types are returned via the st0 register +*/ +class x86MsThiscall: public ICallingConvention +{ +public: + x86MsThiscall(std::vector vecArgTypes, DataTypeSized_t returnType, int iAlignment=4); + ~x86MsThiscall(); + + virtual std::list GetRegisters(); + virtual int GetPopSize(); + virtual int x86MsThiscall::GetArgStackSize(); + virtual void** GetStackArgumentPtr(CRegisters* pRegisters); + + virtual void* GetArgumentPtr(int iIndex, CRegisters* pRegisters); + virtual void ArgumentPtrChanged(int iIndex, CRegisters* pRegisters, void* pArgumentPtr); + + virtual void* GetReturnPtr(CRegisters* pRegisters); + virtual void ReturnPtrChanged(CRegisters* pRegisters, void* pReturnPtr); + +private: + void* m_pReturnBuffer; +}; + +#endif // _X86_MS_THISCALL_H \ No newline at end of file diff --git a/DynamicHooks/hook.cpp b/DynamicHooks/hook.cpp new file mode 100644 index 0000000..20ced8c --- /dev/null +++ b/DynamicHooks/hook.cpp @@ -0,0 +1,634 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "hook.h" +#include "utilities.h" +#include "asm.h" + + +using namespace asmjit; +using namespace asmjit::x86; + +// ============================================================================ +// >> DEFINITIONS +// ============================================================================ +#define JMP_SIZE 6 + + +// ============================================================================ +// >> CHook +// ============================================================================ +CHook::CHook(void* pFunc, ICallingConvention* pConvention) +{ + m_pFunc = pFunc; + m_pRegisters = new CRegisters(pConvention->GetRegisters()); + m_pCallingConvention = pConvention; + + unsigned char* pTarget = (unsigned char *) pFunc; + + // Determine the number of bytes we need to copy + int iBytesToCopy = copy_bytes(pTarget, NULL, JMP_SIZE); + + // Create an array for the bytes to copy + a jump to the rest of the + // function. + unsigned char* pCopiedBytes = new unsigned char[iBytesToCopy + JMP_SIZE]; + + // Fill the array with NOP instructions + memset(pCopiedBytes, 0x90, iBytesToCopy + JMP_SIZE); + + // Copy the required bytes to our array + SetMemPatchable(pCopiedBytes, iBytesToCopy + JMP_SIZE); + copy_bytes(pTarget, pCopiedBytes, JMP_SIZE); + + // Write a jump after the copied bytes to the function/bridge + number of bytes to copy + WriteJMP(pCopiedBytes + iBytesToCopy, pTarget + iBytesToCopy); + + // Save the trampoline + m_pTrampoline = (void *) pCopiedBytes; + + // Create the bridge function + m_pBridge = CreateBridge(); + + // Write a jump to the bridge + WriteJMP((unsigned char *) pFunc, m_pBridge); +} + +CHook::~CHook() +{ + // Copy back the previously copied bytes + copy_bytes((unsigned char *) m_pTrampoline, (unsigned char *) m_pFunc, JMP_SIZE); + + // Free the trampoline array + free(m_pTrampoline); + + // Free the asm bridge and new return address + m_Runtime.release(m_pBridge); + m_Runtime.release(m_pNewRetAddr); + + delete m_pRegisters; + delete m_pCallingConvention; +} + +void CHook::AddCallback(HookType_t eHookType, HookHandlerFn* pCallback) +{ + if (!pCallback) + return; + + if (!IsCallbackRegistered(eHookType, pCallback)) + m_hookHandler[eHookType].push_back(pCallback); +} + +void CHook::RemoveCallback(HookType_t eHookType, HookHandlerFn* pCallback) +{ + if (IsCallbackRegistered(eHookType, pCallback)) + m_hookHandler[eHookType].remove(pCallback); +} + +bool CHook::IsCallbackRegistered(HookType_t eHookType, HookHandlerFn* pCallback) +{ + std::list callbacks = m_hookHandler[eHookType]; + for(std::list::iterator it=callbacks.begin(); it != callbacks.end(); it++) + { + if (*it == pCallback) + return true; + } + return false; +} + +bool CHook::AreCallbacksRegistered() +{ + return !m_hookHandler[HOOKTYPE_PRE].empty() || !m_hookHandler[HOOKTYPE_POST].empty(); +} + +bool CHook::HookHandler(HookType_t eHookType) +{ + bool bOverride = false; + std::list callbacks = this->m_hookHandler[eHookType]; + for(std::list::iterator it=callbacks.begin(); it != callbacks.end(); it++) + { + bool result = ((HookHandlerFn) *it)(eHookType, this); + if (result) + bOverride = true; + } + return bOverride; +} + +void* __cdecl CHook::GetReturnAddress(void* pESP) +{ + if (m_RetAddr.count(pESP) == 0) + puts("ESP not present."); + + return m_RetAddr[pESP]; +} + +void __cdecl CHook::SetReturnAddress(void* pRetAddr, void* pESP) +{ + m_RetAddr[pESP] = pRetAddr; +} + +void* CHook::CreateBridge() +{ + X86Assembler a(&m_Runtime); + Label label_supercede = a.newLabel(); + + // Write a redirect to the post-hook code + Write_ModifyReturnAddress(a); + + // Call the pre-hook handler and jump to label_supercede if true was returned + Write_CallHandler(a, HOOKTYPE_PRE); + a.cmp(eax.r8(), true); + + // Restore the previously saved registers, so any changes will be applied + Write_RestoreRegisters(a); + + a.je(label_supercede); + + // Jump to the trampoline + a.jmp(Ptr(m_pTrampoline)); + + // This code will be executed if a pre-hook returns true + a.bind(label_supercede); + + // Finally, return to the caller + // This will still call post hooks, but will skip the original function. + a.ret(imm(m_pCallingConvention->GetPopSize())); + + return a.make(); +} + +void CHook::Write_ModifyReturnAddress(X86Assembler& a) +{ + // Save scratch registers that are used by SetReturnAddress + static void* pEAX = NULL; + static void* pECX = NULL; + static void* pEDX = NULL; + a.mov(dword_ptr_abs(Ptr(&pEAX)), eax); + a.mov(dword_ptr_abs(Ptr(&pECX)), ecx); + a.mov(dword_ptr_abs(Ptr(&pEDX)), edx); + + // Store the return address in eax + a.mov(eax, dword_ptr(esp)); + + // Save the original return address by using the current esp as the key. + // This should be unique until we have returned to the original caller. + void (__cdecl CHook::*SetReturnAddress)(void*, void*) = &CHook::SetReturnAddress; + a.push(esp); + a.push(eax); + a.push(imm_ptr(this)); + a.call(imm_ptr((void *&)SetReturnAddress)); + a.add(esp, 12); + + // Restore scratch registers + a.mov(eax, dword_ptr_abs(Ptr(&pEAX))); + a.mov(ecx, dword_ptr_abs(Ptr(&pECX))); + a.mov(edx, dword_ptr_abs(Ptr(&pEDX))); + + // Override the return address. This is a redirect to our post-hook code + m_pNewRetAddr = CreatePostCallback(); + a.mov(dword_ptr(esp), imm_ptr(m_pNewRetAddr)); +} + +void* CHook::CreatePostCallback() +{ + X86Assembler a(&m_Runtime); + + int iPopSize = m_pCallingConvention->GetPopSize(); + + // Subtract the previously added bytes (stack size + return address), so + // that we can access the arguments again + a.sub(esp, imm(iPopSize+4)); + + // Call the post-hook handler + Write_CallHandler(a, HOOKTYPE_POST); + + // Restore the previously saved registers, so any changes will be applied + Write_RestoreRegisters(a); + + // Save scratch registers that are used by GetReturnAddress + static void* pEAX = NULL; + static void* pECX = NULL; + static void* pEDX = NULL; + a.mov(dword_ptr_abs(Ptr(&pEAX)), eax); + a.mov(dword_ptr_abs(Ptr(&pECX)), ecx); + a.mov(dword_ptr_abs(Ptr(&pEDX)), edx); + + // Get the original return address + void* (__cdecl CHook::*GetReturnAddress)(void*) = &CHook::GetReturnAddress; + a.push(esp); + a.push(imm_ptr(this)); + a.call(imm_ptr((void *&)GetReturnAddress)); + a.add(esp, 8); + + // Save the original return address + static void* pRetAddr = NULL; + a.mov(dword_ptr_abs(Ptr(&pRetAddr)), eax); + + // Restore scratch registers + a.mov(eax, dword_ptr_abs(Ptr(&pEAX))); + a.mov(ecx, dword_ptr_abs(Ptr(&pECX))); + a.mov(edx, dword_ptr_abs(Ptr(&pEDX))); + + // Add the bytes again to the stack (stack size + return address), so we + // don't corrupt the stack. + a.add(esp, imm(iPopSize+4)); + + // Jump to the original return address + a.jmp(dword_ptr_abs(Ptr(&pRetAddr))); + + // Generate the code + return a.make(); +} + +void CHook::Write_CallHandler(X86Assembler& a, HookType_t type) +{ + bool (__cdecl CHook::*HookHandler)(HookType_t) = &CHook::HookHandler; + + // Save the registers so that we can access them in our handlers + Write_SaveRegisters(a); + + // Call the global hook handler + a.push(type); + a.push(imm_ptr(this)); + a.call(imm_ptr((void *&)HookHandler)); + a.add(esp, 8); +} + +void CHook::Write_SaveRegisters(X86Assembler& a) +{ + std::list vecRegistersToSave = m_pCallingConvention->GetRegisters(); + for(std::list::iterator it=vecRegistersToSave.begin(); it != vecRegistersToSave.end(); it++) + { + switch(*it) + { + // ======================================================================== + // >> 8-bit General purpose registers + // ======================================================================== + case AL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_al->m_pAddress)), al); break; + case CL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_cl->m_pAddress)), cl); break; + case DL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_dl->m_pAddress)), dl); break; + case BL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_bl->m_pAddress)), bl); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case SPL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_spl->m_pAddress)), spl); break; + case BPL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_bpl->m_pAddress)), bpl); break; + case SIL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_sil->m_pAddress)), sil); break; + case DIL: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_dil->m_pAddress)), dil); break; + case R8B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r8b->m_pAddress)), r8b); break; + case R9B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r9b->m_pAddress)), r9b); break; + case R10B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r10b->m_pAddress)), r10b); break; + case R11B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r11b->m_pAddress)), r11b); break; + case R12B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r12b->m_pAddress)), r12b); break; + case R13B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r13b->m_pAddress)), r13b); break; + case R14B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r14b->m_pAddress)), r14b); break; + case R15B: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_r15b->m_pAddress)), r15b); break; +#endif // ASMJIT_X64 + + case AH: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_ah->m_pAddress)), ah); break; + case CH: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_ch->m_pAddress)), ch); break; + case DH: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_dh->m_pAddress)), dh); break; + case BH: a.mov(byte_ptr_abs(Ptr(m_pRegisters->m_bh->m_pAddress)), bh); break; + + // ======================================================================== + // >> 16-bit General purpose registers + // ======================================================================== + case AX: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_ax->m_pAddress)), ax); break; + case CX: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_cx->m_pAddress)), cx); break; + case DX: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_dx->m_pAddress)), dx); break; + case BX: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_bx->m_pAddress)), bx); break; + case SP: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_sp->m_pAddress)), x86::sp); break; + case BP: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_bp->m_pAddress)), bp); break; + case SI: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_si->m_pAddress)), si); break; + case DI: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_di->m_pAddress)), di); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case R8W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r8w->m_pAddress)), r8w); break; + case R9W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r9w->m_pAddress)), r9w); break; + case R10W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r10w->m_pAddress)), r10w); break; + case R11W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r11w->m_pAddress)), r11w); break; + case R12W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r12w->m_pAddress)), r12w); break; + case R13W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r13w->m_pAddress)), r13w); break; + case R14W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r14w->m_pAddress)), r14w); break; + case R15W: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_r15w->m_pAddress)), r15w); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 32-bit General purpose registers + // ======================================================================== + case EAX: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_eax->m_pAddress)), eax); break; + case ECX: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_ecx->m_pAddress)), ecx); break; + case EDX: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_edx->m_pAddress)), edx); break; + case EBX: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_ebx->m_pAddress)), ebx); break; + case ESP: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_esp->m_pAddress)), esp); break; + case EBP: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_ebp->m_pAddress)), ebp); break; + case ESI: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_esi->m_pAddress)), esi); break; + case EDI: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_edi->m_pAddress)), edi); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case R8D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r8d->m_pAddress)), r8d); break; + case R9D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r9d->m_pAddress)), r9d); break; + case R10D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r10d->m_pAddress)), r10d); break; + case R11D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r11d->m_pAddress)), r11d); break; + case R12D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r12d->m_pAddress)), r12d); break; + case R13D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r13d->m_pAddress)), r13d); break; + case R14D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r14d->m_pAddress)), r14d); break; + case R15D: a.mov(dword_ptr_abs(Ptr(m_pRegisters->m_r15d->m_pAddress)), r15d); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 64-bit General purpose registers + // ======================================================================== +#if defined(ASMJIT_X64) + // 64-bit mode only + case RAX: a.mov(qword_ptr_abs((m_pRegisters->m_rax->m_pAddress)), rax); break; + case RCX: a.mov(qword_ptr_abs((m_pRegisters->m_rcx->m_pAddress)), rcx); break; + case RDX: a.mov(qword_ptr_abs((m_pRegisters->m_rdx->m_pAddress)), rdx); break; + case RBX: a.mov(qword_ptr_abs((m_pRegisters->m_rbx->m_pAddress)), rbx); break; + case RSP: a.mov(qword_ptr_abs((m_pRegisters->m_rsp->m_pAddress)), rsp); break; + case RBP: a.mov(qword_ptr_abs((m_pRegisters->m_rbp->m_pAddress)), rbp); break; + case RSI: a.mov(qword_ptr_abs((m_pRegisters->m_rsi->m_pAddress)), rsi); break; + case RDI: a.mov(qword_ptr_abs((m_pRegisters->m_rdi->m_pAddress)), rdi); break; +#endif // ASMJIT_X64 + +#if defined(ASMJIT_X64) + // 64-bit mode only + case R8: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r8->m_pAddress)), r8); break; + case R9: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r9->m_pAddress)), r9); break; + case R10: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r10->m_pAddress)), r10); break; + case R11: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r11->m_pAddress)), r11); break; + case R12: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r12->m_pAddress)), r12); break; + case R13: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r13->m_pAddress)), r13); break; + case R14: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r14->m_pAddress)), r14); break; + case R15: a.mov(qword_ptr_abs(Ptr(m_pRegisters->m_r15->m_pAddress)), r15); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 64-bit MM (MMX) registers + // ======================================================================== + case MM0: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm0->m_pAddress)), mm0); break; + case MM1: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm1->m_pAddress)), mm1); break; + case MM2: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm2->m_pAddress)), mm2); break; + case MM3: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm3->m_pAddress)), mm3); break; + case MM4: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm4->m_pAddress)), mm4); break; + case MM5: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm5->m_pAddress)), mm5); break; + case MM6: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm6->m_pAddress)), mm6); break; + case MM7: a.movq(qword_ptr_abs(Ptr(m_pRegisters->m_mm7->m_pAddress)), mm7); break; + + // ======================================================================== + // >> 128-bit XMM registers + // ======================================================================== + // TODO: Also provide movups? + case XMM0: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm0->m_pAddress)), xmm0); break; + case XMM1: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm1->m_pAddress)), xmm1); break; + case XMM2: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm2->m_pAddress)), xmm2); break; + case XMM3: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm3->m_pAddress)), xmm3); break; + case XMM4: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm4->m_pAddress)), xmm4); break; + case XMM5: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm5->m_pAddress)), xmm5); break; + case XMM6: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm6->m_pAddress)), xmm6); break; + case XMM7: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm7->m_pAddress)), xmm7); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case XMM8: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm8->m_pAddress)), xmm8); break; + case XMM9: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm9->m_pAddress)), xmm9); break; + case XMM10: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm10->m_pAddress)), xmm10); break; + case XMM11: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm11->m_pAddress)), xmm11); break; + case XMM12: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm12->m_pAddress)), xmm12); break; + case XMM13: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm13->m_pAddress)), xmm13); break; + case XMM14: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm14->m_pAddress)), xmm14); break; + case XMM15: a.movaps(qword_ptr_abs(Ptr(m_pRegisters->m_xmm15->m_pAddress)), xmm15); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 16-bit Segment registers + // ======================================================================== + case CS: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_cs->m_pAddress)), cs); break; + case SS: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_ss->m_pAddress)), ss); break; + case DS: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_ds->m_pAddress)), ds); break; + case ES: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_es->m_pAddress)), es); break; + case FS: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_fs->m_pAddress)), fs); break; + case GS: a.mov(word_ptr_abs(Ptr(m_pRegisters->m_gs->m_pAddress)), gs); break; + + // ======================================================================== + // >> 80-bit FPU registers + // ======================================================================== + case ST0: a.fst(dword_ptr_abs(Ptr(m_pRegisters->m_st0->m_pAddress))); break; + //case ST1: a.mov(tword_ptr_abs(Ptr(m_pRegisters->m_st1->m_pAddress)), st1); break; + //case ST2: a.mov(tword_ptr_abs(Ptr(m_pRegisters->m_st2->m_pAddress)), st2); break; + //case ST3: a.mov(tword_ptr_abs(Ptr(m_pRegisters->m_st3->m_pAddress)), st3); break; + //case ST4: a.mov(tword_ptr_abs(Ptr(m_pRegisters->m_st4->m_pAddress)), st4); break; + //case ST5: a.mov(tword_ptr_abs(Ptr(m_pRegisters->m_st5->m_pAddress)), st5); break; + //case ST6: a.mov(tword_ptr_abs(Ptr(m_pRegisters->m_st6->m_pAddress)), st6); break; + //case ST7: a.mov(tword_ptr_abs(Ptr(m_pRegisters->m_st7->m_pAddress)), st7); break; + + default: puts("Unsupported register."); + } + } +} + +void CHook::Write_RestoreRegisters(X86Assembler& a) +{ + std::list vecRegistersToSave = m_pCallingConvention->GetRegisters(); + for(std::list::iterator it=vecRegistersToSave.begin(); it != vecRegistersToSave.end(); it++) + { + switch(*it) + { + // ======================================================================== + // >> 8-bit General purpose registers + // ======================================================================== + case AL: a.mov(al, byte_ptr_abs(Ptr(m_pRegisters->m_al->m_pAddress))); break; + case CL: a.mov(cl, byte_ptr_abs(Ptr(m_pRegisters->m_cl->m_pAddress))); break; + case DL: a.mov(dl, byte_ptr_abs(Ptr(m_pRegisters->m_dl->m_pAddress))); break; + case BL: a.mov(bl, byte_ptr_abs(Ptr(m_pRegisters->m_bl->m_pAddress))); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case SPL: a.mov(spl, byte_ptr_abs(Ptr(m_pRegisters->m_spl->m_pAddress))); break; + case BPL: a.mov(bpl, byte_ptr_abs(Ptr(m_pRegisters->m_bpl->m_pAddress))); break; + case SIL: a.mov(sil, byte_ptr_abs(Ptr(m_pRegisters->m_sil->m_pAddress))); break; + case DIL: a.mov(dil, byte_ptr_abs(Ptr(m_pRegisters->m_dil->m_pAddress))); break; + case R8B: a.mov(r8b, byte_ptr_abs(Ptr(m_pRegisters->m_r8b->m_pAddress))); break; + case R9B: a.mov(r9b, byte_ptr_abs(Ptr(m_pRegisters->m_r9b->m_pAddress))); break; + case R10B: a.mov(r10b, byte_ptr_abs(Ptr(m_pRegisters->m_r10b->m_pAddress))); break; + case R11B: a.mov(r11b, byte_ptr_abs(Ptr(m_pRegisters->m_r11b->m_pAddress))); break; + case R12B: a.mov(r12b, byte_ptr_abs(Ptr(m_pRegisters->m_r12b->m_pAddress))); break; + case R13B: a.mov(r13b, byte_ptr_abs(Ptr(m_pRegisters->m_r13b->m_pAddress))); break; + case R14B: a.mov(r14b, byte_ptr_abs(Ptr(m_pRegisters->m_r14b->m_pAddress))); break; + case R15B: a.mov(r15b, byte_ptr_abs(Ptr(m_pRegisters->m_r15b->m_pAddress))); break; +#endif // ASMJIT_X64 + + case AH: a.mov(ah, byte_ptr_abs(Ptr(m_pRegisters->m_ah->m_pAddress))); break; + case CH: a.mov(ch, byte_ptr_abs(Ptr(m_pRegisters->m_ch->m_pAddress))); break; + case DH: a.mov(dh, byte_ptr_abs(Ptr(m_pRegisters->m_dh->m_pAddress))); break; + case BH: a.mov(bh, byte_ptr_abs(Ptr(m_pRegisters->m_bh->m_pAddress))); break; + + // ======================================================================== + // >> 16-bit General purpose registers + // ======================================================================== + case AX: a.mov(ax, word_ptr_abs(Ptr(m_pRegisters->m_ax->m_pAddress))); break; + case CX: a.mov(cx, word_ptr_abs(Ptr(m_pRegisters->m_cx->m_pAddress))); break; + case DX: a.mov(dx, word_ptr_abs(Ptr(m_pRegisters->m_dx->m_pAddress))); break; + case BX: a.mov(bx, word_ptr_abs(Ptr(m_pRegisters->m_bx->m_pAddress))); break; + case SP: a.mov(x86::sp, word_ptr_abs(Ptr(m_pRegisters->m_sp->m_pAddress))); break; + case BP: a.mov(bp, word_ptr_abs(Ptr(m_pRegisters->m_bp->m_pAddress))); break; + case SI: a.mov(si, word_ptr_abs(Ptr(m_pRegisters->m_si->m_pAddress))); break; + case DI: a.mov(di, word_ptr_abs(Ptr(m_pRegisters->m_di->m_pAddress))); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case R8W: a.mov(r8w, word_ptr_abs(Ptr(m_pRegisters->m_r8w->m_pAddress))); break; + case R9W: a.mov(r9w, word_ptr_abs(Ptr(m_pRegisters->m_r9w->m_pAddress))); break; + case R10W: a.mov(r10w, word_ptr_abs(Ptr(m_pRegisters->m_r10w->m_pAddress))); break; + case R11W: a.mov(r11w, word_ptr_abs(Ptr(m_pRegisters->m_r11w->m_pAddress))); break; + case R12W: a.mov(r12w, word_ptr_abs(Ptr(m_pRegisters->m_r12w->m_pAddress))); break; + case R13W: a.mov(r13w, word_ptr_abs(Ptr(m_pRegisters->m_r13w->m_pAddress))); break; + case R14W: a.mov(r14w, word_ptr_abs(Ptr(m_pRegisters->m_r14w->m_pAddress))); break; + case R15W: a.mov(r15w, word_ptr_abs(Ptr(m_pRegisters->m_r15w->m_pAddress))); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 32-bit General purpose registers + // ======================================================================== + case EAX: a.mov(eax, dword_ptr_abs(Ptr(m_pRegisters->m_eax->m_pAddress))); break; + case ECX: a.mov(ecx, dword_ptr_abs(Ptr(m_pRegisters->m_ecx->m_pAddress))); break; + case EDX: a.mov(edx, dword_ptr_abs(Ptr(m_pRegisters->m_edx->m_pAddress))); break; + case EBX: a.mov(ebx, dword_ptr_abs(Ptr(m_pRegisters->m_ebx->m_pAddress))); break; + case ESP: a.mov(esp, dword_ptr_abs(Ptr(m_pRegisters->m_esp->m_pAddress))); break; + case EBP: a.mov(ebp, dword_ptr_abs(Ptr(m_pRegisters->m_ebp->m_pAddress))); break; + case ESI: a.mov(esi, dword_ptr_abs(Ptr(m_pRegisters->m_esi->m_pAddress))); break; + case EDI: a.mov(edi, dword_ptr_abs(Ptr(m_pRegisters->m_edi->m_pAddress))); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case R8D: a.mov(r8d, qword_ptr_abs(Ptr(m_pRegisters->m_r8d->m_pAddress))); break; + case R9D: a.mov(r9d, qword_ptr_abs(Ptr(m_pRegisters->m_r9d->m_pAddress))); break; + case R10D: a.mov(r10d, qword_ptr_abs(Ptr(m_pRegisters->m_r10d->m_pAddress))); break; + case R11D: a.mov(r11d, qword_ptr_abs(Ptr(m_pRegisters->m_r11d->m_pAddress))); break; + case R12D: a.mov(r12d, qword_ptr_abs(Ptr(m_pRegisters->m_r12d->m_pAddress))); break; + case R13D: a.mov(r13d, qword_ptr_abs(Ptr(m_pRegisters->m_r13d->m_pAddress))); break; + case R14D: a.mov(r14d, qword_ptr_abs(Ptr(m_pRegisters->m_r14d->m_pAddress))); break; + case R15D: a.mov(r15d, qword_ptr_abs(Ptr(m_pRegisters->m_r15d->m_pAddress))); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 64-bit General purpose registers + // ======================================================================== +#if defined(ASMJIT_X64) + // 64-bit mode only + case RAX: a.mov(rax, qword_ptr_abs(Ptr(m_pRegisters->m_rax->m_pAddress))); break; + case RCX: a.mov(rcx, qword_ptr_abs(Ptr(m_pRegisters->m_rcx->m_pAddress))); break; + case RDX: a.mov(rdx, qword_ptr_abs(Ptr(m_pRegisters->m_rdx->m_pAddress))); break; + case RBX: a.mov(rbx, qword_ptr_abs(Ptr(m_pRegisters->m_rbx->m_pAddress))); break; + case RSP: a.mov(rsp, qword_ptr_abs(Ptr(m_pRegisters->m_rsp->m_pAddress))); break; + case RBP: a.mov(rbp, qword_ptr_abs(Ptr(m_pRegisters->m_rbp->m_pAddress))); break; + case RSI: a.mov(rsi, qword_ptr_abs(Ptr(m_pRegisters->m_rsi->m_pAddress))); break; + case RDI: a.mov(rdi, qword_ptr_abs(Ptr(m_pRegisters->m_rdi->m_pAddress))); break; +#endif // ASMJIT_X64 + +#if defined(ASMJIT_X64) + // 64-bit mode only + case R8: a.mov(r8, qword_ptr_abs(Ptr(m_pRegisters->m_r8->m_pAddress))); break; + case R9: a.mov(r9, qword_ptr_abs(Ptr(m_pRegisters->m_r9->m_pAddress))); break; + case R10: a.mov(r10, qword_ptr_abs(Ptr(m_pRegisters->m_r10->m_pAddress))); break; + case R11: a.mov(r11, qword_ptr_abs(Ptr(m_pRegisters->m_r11->m_pAddress))); break; + case R12: a.mov(r12, qword_ptr_abs(Ptr(m_pRegisters->m_r12->m_pAddress))); break; + case R13: a.mov(r13, qword_ptr_abs(Ptr(m_pRegisters->m_r13->m_pAddress))); break; + case R14: a.mov(r14, qword_ptr_abs(Ptr(m_pRegisters->m_r14->m_pAddress))); break; + case R15: a.mov(r15, qword_ptr_abs(Ptr(m_pRegisters->m_r15->m_pAddress))); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 64-bit MM (MMX) registers + // ======================================================================== + case MM0: a.movq(mm0, qword_ptr_abs(Ptr(m_pRegisters->m_mm0->m_pAddress))); break; + case MM1: a.movq(mm1, qword_ptr_abs(Ptr(m_pRegisters->m_mm1->m_pAddress))); break; + case MM2: a.movq(mm2, qword_ptr_abs(Ptr(m_pRegisters->m_mm2->m_pAddress))); break; + case MM3: a.movq(mm3, qword_ptr_abs(Ptr(m_pRegisters->m_mm3->m_pAddress))); break; + case MM4: a.movq(mm4, qword_ptr_abs(Ptr(m_pRegisters->m_mm4->m_pAddress))); break; + case MM5: a.movq(mm5, qword_ptr_abs(Ptr(m_pRegisters->m_mm5->m_pAddress))); break; + case MM6: a.movq(mm6, qword_ptr_abs(Ptr(m_pRegisters->m_mm6->m_pAddress))); break; + case MM7: a.movq(mm7, qword_ptr_abs(Ptr(m_pRegisters->m_mm7->m_pAddress))); break; + + // ======================================================================== + // >> 128-bit XMM registers + // ======================================================================== + // TODO: Also provide movups? + case XMM0: a.movaps(xmm0, oword_ptr_abs(Ptr(m_pRegisters->m_xmm0->m_pAddress))); break; + case XMM1: a.movaps(xmm1, oword_ptr_abs(Ptr(m_pRegisters->m_xmm1->m_pAddress))); break; + case XMM2: a.movaps(xmm2, oword_ptr_abs(Ptr(m_pRegisters->m_xmm2->m_pAddress))); break; + case XMM3: a.movaps(xmm3, oword_ptr_abs(Ptr(m_pRegisters->m_xmm3->m_pAddress))); break; + case XMM4: a.movaps(xmm4, oword_ptr_abs(Ptr(m_pRegisters->m_xmm4->m_pAddress))); break; + case XMM5: a.movaps(xmm5, oword_ptr_abs(Ptr(m_pRegisters->m_xmm5->m_pAddress))); break; + case XMM6: a.movaps(xmm6, oword_ptr_abs(Ptr(m_pRegisters->m_xmm6->m_pAddress))); break; + case XMM7: a.movaps(xmm7, oword_ptr_abs(Ptr(m_pRegisters->m_xmm7->m_pAddress))); break; + +#if defined(ASMJIT_X64) + // 64-bit mode only + case XMM8: a.movaps(xmm8, qword_ptr_abs(Ptr(m_pRegisters->m_xmm8->m_pAddress))); break; + case XMM9: a.movaps(xmm9, qword_ptr_abs(Ptr(m_pRegisters->m_xmm9->m_pAddress))); break; + case XMM10: a.movaps(xmm10, qword_ptr_abs(Ptr(m_pRegisters->m_xmm10->m_pAddress))); break; + case XMM11: a.movaps(xmm11, qword_ptr_abs(Ptr(m_pRegisters->m_xmm11->m_pAddress))); break; + case XMM12: a.movaps(xmm12, qword_ptr_abs(Ptr(m_pRegisters->m_xmm12->m_pAddress))); break; + case XMM13: a.movaps(xmm13, qword_ptr_abs(Ptr(m_pRegisters->m_xmm13->m_pAddress))); break; + case XMM14: a.movaps(xmm14, qword_ptr_abs(Ptr(m_pRegisters->m_xmm14->m_pAddress))); break; + case XMM15: a.movaps(xmm15, qword_ptr_abs(Ptr(m_pRegisters->m_xmm15->m_pAddress))); break; +#endif // ASMJIT_X64 + + // ======================================================================== + // >> 16-bit Segment registers + // ======================================================================== + case CS: a.mov(cs, word_ptr_abs(Ptr(m_pRegisters->m_cs->m_pAddress))); break; + case SS: a.mov(ss, word_ptr_abs(Ptr(m_pRegisters->m_ss->m_pAddress))); break; + case DS: a.mov(ds, word_ptr_abs(Ptr(m_pRegisters->m_ds->m_pAddress))); break; + case ES: a.mov(es, word_ptr_abs(Ptr(m_pRegisters->m_es->m_pAddress))); break; + case FS: a.mov(fs, word_ptr_abs(Ptr(m_pRegisters->m_fs->m_pAddress))); break; + case GS: a.mov(gs, word_ptr_abs(Ptr(m_pRegisters->m_gs->m_pAddress))); break; + + // ======================================================================== + // >> 80-bit FPU registers + // ======================================================================== + case ST0: a.fld(dword_ptr_abs(Ptr(m_pRegisters->m_st0->m_pAddress))); break; + //case ST1: a.mov(st1, tword_ptr_abs(Ptr(m_pRegisters->m_st1->m_pAddress))); break; + //case ST2: a.mov(st2, tword_ptr_abs(Ptr(m_pRegisters->m_st2->m_pAddress))); break; + //case ST3: a.mov(st3, tword_ptr_abs(Ptr(m_pRegisters->m_st3->m_pAddress))); break; + //case ST4: a.mov(st4, tword_ptr_abs(Ptr(m_pRegisters->m_st4->m_pAddress))); break; + //case ST5: a.mov(st5, tword_ptr_abs(Ptr(m_pRegisters->m_st5->m_pAddress))); break; + //case ST6: a.mov(st6, tword_ptr_abs(Ptr(m_pRegisters->m_st6->m_pAddress))); break; + //case ST7: a.mov(st7, tword_ptr_abs(Ptr(m_pRegisters->m_st7->m_pAddress))); break; + + default: puts("Unsupported register."); + } + } +} \ No newline at end of file diff --git a/DynamicHooks/hook.h b/DynamicHooks/hook.h new file mode 100644 index 0000000..954fbfb --- /dev/null +++ b/DynamicHooks/hook.h @@ -0,0 +1,187 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _HOOK_H +#define _HOOK_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include +#include + +#include "registers.h" +#include "convention.h" +#include "AsmJit/asmjit.h" + +// ============================================================================ +// >> HookType_t +// ============================================================================ +enum HookType_t +{ + // Callback will be executed before the original function. + HOOKTYPE_PRE, + + // Callback will be executed after the original function. + HOOKTYPE_POST +}; + + +// ============================================================================ +// >> TYPEDEFS +// ============================================================================ +class CHook; +typedef bool (*HookHandlerFn)(HookType_t, CHook*); + +#ifdef __linux__ +#define __cdecl +#endif + + +// ============================================================================ +// >> CLASSES +// ============================================================================ + +class CHook +{ +private: + friend class CHookManager; + + /* + Creates a new function hook. + + @param : + The address of the function to hook + + @param : + The calling convention of . + */ + CHook(void* pFunc, ICallingConvention* pConvention); + ~CHook(); + +public: + /* + Adds a hook handler to the hook. + + @param type The hook type. + @param pFunc The hook handler that should be added. + */ + void AddCallback(HookType_t type, HookHandlerFn* pFunc); + + /* + Removes a hook handler to the hook. + + @param type The hook type. + @param pFunc The hook handler that should be removed. + */ + void RemoveCallback(HookType_t type, HookHandlerFn* pFunc); + + /* + Checks if a hook handler is already added. + + @param type The hook type. + @param pFunc The hook handler that should be checked. + */ + bool IsCallbackRegistered(HookType_t type, HookHandlerFn* pFunc); + + /* + Checks if there are any hook handlers added to this hook. + */ + bool AreCallbacksRegistered(); + + template + T GetArgument(int iIndex) + { + return *(T *) m_pCallingConvention->GetArgumentPtr(iIndex, m_pRegisters); + } + + template + void SetArgument(int iIndex, T value) + { + void* pPtr = m_pCallingConvention->GetArgumentPtr(iIndex, m_pRegisters); + *(T *) pPtr = value; + m_pCallingConvention->ArgumentPtrChanged(iIndex, m_pRegisters, pPtr); + } + + template + T GetReturnValue() + { + return *(T *) m_pCallingConvention->GetReturnPtr(m_pRegisters); + } + + template + void SetReturnValue(T value) + { + void* pPtr = m_pCallingConvention->GetReturnPtr(m_pRegisters); + *(T *) pPtr = value; + m_pCallingConvention->ReturnPtrChanged(m_pRegisters, pPtr); + } + +private: + void* CreateBridge(); + + void Write_ModifyReturnAddress(asmjit::X86Assembler& a); + void Write_CallHandler(asmjit::X86Assembler& a, HookType_t type); + void Write_SaveRegisters(asmjit::X86Assembler& a); + void Write_RestoreRegisters(asmjit::X86Assembler& a); + + void* CreatePostCallback(); + + bool __cdecl HookHandler(HookType_t type); + + void* __cdecl GetReturnAddress(void* pESP); + void __cdecl SetReturnAddress(void* pRetAddr, void* pESP); + +public: + std::map > m_hookHandler; + + // Address of the original function + void* m_pFunc; + + asmjit::JitRuntime m_Runtime; + + ICallingConvention* m_pCallingConvention; + + // Address of the bridge + void* m_pBridge; + + // Address of the trampoline + void* m_pTrampoline; + + // Register storage + CRegisters* m_pRegisters; + + // New return address + void* m_pNewRetAddr; + + std::map m_RetAddr; +}; + +#endif // _HOOK_H \ No newline at end of file diff --git a/DynamicHooks/manager.cpp b/DynamicHooks/manager.cpp new file mode 100644 index 0000000..b82ac73 --- /dev/null +++ b/DynamicHooks/manager.cpp @@ -0,0 +1,97 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include "manager.h" + + +// ============================================================================ +// >> CHookManager +// ============================================================================ +CHook* CHookManager::HookFunction(void* pFunc, ICallingConvention* pConvention) +{ + if (!pFunc) + return NULL; + + CHook* pHook = FindHook(pFunc); + if (pHook) + { + delete pConvention; + return pHook; + } + + pHook = new CHook(pFunc, pConvention); + m_Hooks.push_back(pHook); + return pHook; +} + +void CHookManager::UnhookFunction(void* pFunc) +{ + CHook* pHook = FindHook(pFunc); + if (pHook) + { + m_Hooks.remove(pHook); + delete pHook; + } +} + +CHook* CHookManager::FindHook(void* pFunc) +{ + if (!pFunc) + return NULL; + + for(std::list::iterator it=m_Hooks.begin(); it != m_Hooks.end(); it++) + { + CHook* pHook = *it; + if (pHook->m_pFunc == pFunc) + return pHook; + } + return NULL; +} + +void CHookManager::UnhookAllFunctions() +{ + for(std::list::iterator it=m_Hooks.begin(); it != m_Hooks.end(); it++) + delete *it; + + m_Hooks.clear(); +} + + +// ============================================================================ +// >> GetHookManager +// ============================================================================ +CHookManager* GetHookManager() +{ + static CHookManager* s_pManager = new CHookManager; + return s_pManager; +} \ No newline at end of file diff --git a/DynamicHooks/manager.h b/DynamicHooks/manager.h new file mode 100644 index 0000000..6899b62 --- /dev/null +++ b/DynamicHooks/manager.h @@ -0,0 +1,83 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _MANAGER_H +#define _MANAGER_H + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include +#include "hook.h" +#include "convention.h" + + +// ============================================================================ +// >> CHookManager +// ============================================================================ +class CHookManager +{ +public: + /* + Hooks the given function and returns a new CHook instance. If the + function was already hooked, the existing CHook instance will be + returned. + */ + CHook* HookFunction(void* pFunc, ICallingConvention* pConvention); + + /* + Removes all callbacks and restores the original function. + */ + void UnhookFunction(void* pFunc); + + /* + Returns either NULL or the found CHook instance. + */ + CHook* FindHook(void* pFunc); + + /* + Removes all callbacks and restores all functions. + */ + void UnhookAllFunctions(); + +public: + std::list m_Hooks; +}; + + +// ============================================================================ +// >> GetHookManager +// ============================================================================ +/* +Returns a pointer to a static CHookManager object. +*/ +CHookManager* GetHookManager(); + +#endif // _MANAGER_H \ No newline at end of file diff --git a/DynamicHooks/registers.cpp b/DynamicHooks/registers.cpp new file mode 100644 index 0000000..04812fc --- /dev/null +++ b/DynamicHooks/registers.cpp @@ -0,0 +1,381 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#include "registers.h" + +CRegisters::CRegisters(std::list registers) +{ + // ======================================================================== + // >> 8-bit General purpose registers + // ======================================================================== + m_al = CreateRegister(registers, AL, 1); + m_cl = CreateRegister(registers, CL, 1); + m_dl = CreateRegister(registers, DL, 1); + m_bl = CreateRegister(registers, BL, 1); + + // 64-bit mode only + /* + m_spl = CreateRegister(registers, SPL, 1); + m_bpl = CreateRegister(registers, BPL, 1); + m_sil = CreateRegister(registers, SIL, 1); + m_dil = CreateRegister(registers, DIL, 1); + m_r8b = CreateRegister(registers, R8B, 1); + m_r9b = CreateRegister(registers, R9B, 1); + m_r10b = CreateRegister(registers, R10B, 1); + m_r11b = CreateRegister(registers, R11B, 1); + m_r12b = CreateRegister(registers, R12B, 1); + m_r13b = CreateRegister(registers, R13B, 1); + m_r14b = CreateRegister(registers, R14B, 1); + m_r15b = CreateRegister(registers, R15B, 1); + */ + + m_ah = CreateRegister(registers, AH, 1); + m_ch = CreateRegister(registers, CH, 1); + m_dh = CreateRegister(registers, DH, 1); + m_bh = CreateRegister(registers, BH, 1); + + // ======================================================================== + // >> 16-bit General purpose registers + // ======================================================================== + m_ax = CreateRegister(registers, AX, 2); + m_cx = CreateRegister(registers, CX, 2); + m_dx = CreateRegister(registers, DX, 2); + m_bx = CreateRegister(registers, BX, 2); + m_sp = CreateRegister(registers, SP, 2); + m_bp = CreateRegister(registers, BP, 2); + m_si = CreateRegister(registers, SI, 2); + m_di = CreateRegister(registers, DI, 2); + + // 64-bit mode only + /* + m_r8w = CreateRegister(registers, R8W, 2); + m_r9w = CreateRegister(registers, R9W, 2); + m_r10w = CreateRegister(registers, R10W, 2); + m_r11w = CreateRegister(registers, R11W, 2); + m_r12w = CreateRegister(registers, R12W, 2); + m_r13w = CreateRegister(registers, R13W, 2); + m_r14w = CreateRegister(registers, R14W, 2); + m_r15w = CreateRegister(registers, R14W, 2); + */ + + // ======================================================================== + // >> 32-bit General purpose registers + // ======================================================================== + m_eax = CreateRegister(registers, EAX, 4); + m_ecx = CreateRegister(registers, ECX, 4); + m_edx = CreateRegister(registers, EDX, 4); + m_ebx = CreateRegister(registers, EBX, 4); + m_esp = CreateRegister(registers, ESP, 4); + m_ebp = CreateRegister(registers, EBP, 4); + m_esi = CreateRegister(registers, ESI, 4); + m_edi = CreateRegister(registers, EDI, 4); + + // 64-bit mode only + /* + m_r8d = CreateRegister(registers, R8D, 4); + m_r9d = CreateRegister(registers, R9D, 4); + m_r10d = CreateRegister(registers, R10D, 4); + m_r11d = CreateRegister(registers, R11D, 4); + m_r12d = CreateRegister(registers, R12D, 4); + m_r13d = CreateRegister(registers, R13D, 4); + m_r14d = CreateRegister(registers, R14D, 4); + m_r15d = CreateRegister(registers, R15D, 4); + */ + + // ======================================================================== + // >> 64-bit General purpose registers + // ======================================================================== + // 64-bit mode only + /* + m_rax = CreateRegister(registers, RAX, 8); + m_rcx = CreateRegister(registers, RCX, 8); + m_rdx = CreateRegister(registers, RDX, 8); + m_rbx = CreateRegister(registers, RBX, 8); + m_rsp = CreateRegister(registers, RSP, 8); + m_rbp = CreateRegister(registers, RBP, 8); + m_rsi = CreateRegister(registers, RSI, 8); + m_rdi = CreateRegister(registers, RDI, 8); + */ + + // 64-bit mode only + /* + m_r8 = CreateRegister(registers, R8, 8); + m_r9 = CreateRegister(registers, R9, 8); + m_r10 = CreateRegister(registers, R10, 8); + m_r11 = CreateRegister(registers, R11, 8); + m_r12 = CreateRegister(registers, R12, 8); + m_r13 = CreateRegister(registers, R13, 8); + m_r14 = CreateRegister(registers, R14, 8); + m_r15 = CreateRegister(registers, R15, 8); + */ + + // ======================================================================== + // >> 64-bit MM (MMX) registers + // ======================================================================== + m_mm0 = CreateRegister(registers, MM0, 8); + m_mm1 = CreateRegister(registers, MM1, 8); + m_mm2 = CreateRegister(registers, MM2, 8); + m_mm3 = CreateRegister(registers, MM3, 8); + m_mm4 = CreateRegister(registers, MM4, 8); + m_mm5 = CreateRegister(registers, MM5, 8); + m_mm6 = CreateRegister(registers, MM6, 8); + m_mm7 = CreateRegister(registers, MM7, 8); + + // ======================================================================== + // >> 128-bit XMM registers + // ======================================================================== + m_xmm0 = CreateRegister(registers, XMM0, 16); + m_xmm1 = CreateRegister(registers, XMM1, 16); + m_xmm2 = CreateRegister(registers, XMM2, 16); + m_xmm3 = CreateRegister(registers, XMM3, 16); + m_xmm4 = CreateRegister(registers, XMM4, 16); + m_xmm5 = CreateRegister(registers, XMM5, 16); + m_xmm6 = CreateRegister(registers, XMM6, 16); + m_xmm7 = CreateRegister(registers, XMM7, 16); + + // 64-bit mode only + /* + m_xmm8 = CreateRegister(registers, XMM8, 16); + m_xmm9 = CreateRegister(registers, XMM9, 16); + m_xmm10 = CreateRegister(registers, XMM10, 16); + m_xmm11 = CreateRegister(registers, XMM11, 16); + m_xmm12 = CreateRegister(registers, XMM12, 16); + m_xmm13 = CreateRegister(registers, XMM13, 16); + m_xmm14 = CreateRegister(registers, XMM14, 16); + m_xmm15 = CreateRegister(registers, XMM15, 16); + */ + + // ======================================================================== + // >> 16-bit Segment registers + // ======================================================================== + m_cs = CreateRegister(registers, CS, 2); + m_ss = CreateRegister(registers, SS, 2); + m_ds = CreateRegister(registers, DS, 2); + m_es = CreateRegister(registers, ES, 2); + m_fs = CreateRegister(registers, FS, 2); + m_gs = CreateRegister(registers, GS, 2); + + // ======================================================================== + // >> 80-bit FPU registers + // ======================================================================== + m_st0 = CreateRegister(registers, ST0, 10); + m_st1 = CreateRegister(registers, ST1, 10); + m_st2 = CreateRegister(registers, ST2, 10); + m_st3 = CreateRegister(registers, ST3, 10); + m_st4 = CreateRegister(registers, ST4, 10); + m_st5 = CreateRegister(registers, ST5, 10); + m_st6 = CreateRegister(registers, ST6, 10); + m_st7 = CreateRegister(registers, ST7, 10); +} + +CRegisters::~CRegisters() +{ + // ======================================================================== + // >> 8-bit General purpose registers + // ======================================================================== + DeleteRegister(m_al); + DeleteRegister(m_cl); + DeleteRegister(m_dl); + DeleteRegister(m_bl); + + // 64-bit mode only + /* + DeleteRegister(m_spl); + DeleteRegister(m_bpl); + DeleteRegister(m_sil); + DeleteRegister(m_dil); + DeleteRegister(m_r8b); + DeleteRegister(m_r9b); + DeleteRegister(m_r10b); + DeleteRegister(m_r11b); + DeleteRegister(m_r12b); + DeleteRegister(m_r13b); + DeleteRegister(m_r14b); + DeleteRegister(m_r15b); + */ + + DeleteRegister(m_ah); + DeleteRegister(m_ch); + DeleteRegister(m_dh); + DeleteRegister(m_bh); + + // ======================================================================== + // >> 16-bit General purpose registers + // ======================================================================== + DeleteRegister(m_ax); + DeleteRegister(m_cx); + DeleteRegister(m_dx); + DeleteRegister(m_bx); + DeleteRegister(m_sp); + DeleteRegister(m_bp); + DeleteRegister(m_si); + DeleteRegister(m_di); + + // 64-bit mode only + /* + DeleteRegister(m_r8w); + DeleteRegister(m_r9w); + DeleteRegister(m_r10w); + DeleteRegister(m_r11w); + DeleteRegister(m_r12w); + DeleteRegister(m_r13w); + DeleteRegister(m_r14w); + DeleteRegister(m_r15w); + */ + + // ======================================================================== + // >> 32-bit General purpose registers + // ======================================================================== + DeleteRegister(m_eax); + DeleteRegister(m_ecx); + DeleteRegister(m_edx); + DeleteRegister(m_ebx); + DeleteRegister(m_esp); + DeleteRegister(m_ebp); + DeleteRegister(m_esi); + DeleteRegister(m_edi); + + // 64-bit mode only + /* + DeleteRegister(m_r8d); + DeleteRegister(m_r9d); + DeleteRegister(m_r10d); + DeleteRegister(m_r11d); + DeleteRegister(m_r12d); + DeleteRegister(m_r13d); + DeleteRegister(m_r14d); + DeleteRegister(m_r15d); + */ + + // ======================================================================== + // >> 64-bit General purpose registers + // ======================================================================== + // 64-bit mode only + /* + DeleteRegister(m_rax); + DeleteRegister(m_rcx); + DeleteRegister(m_rdx); + DeleteRegister(m_rbx); + DeleteRegister(m_rsp); + DeleteRegister(m_rbp); + DeleteRegister(m_rsi); + DeleteRegister(m_rdi); + */ + + // 64-bit mode only + /* + DeleteRegister(m_r8); + DeleteRegister(m_r9); + DeleteRegister(m_r10); + DeleteRegister(m_r11); + DeleteRegister(m_r12); + DeleteRegister(m_r13); + DeleteRegister(m_r14); + DeleteRegister(m_r15); + */ + + // ======================================================================== + // >> 64-bit MM (MMX) registers + // ======================================================================== + DeleteRegister(m_mm0); + DeleteRegister(m_mm1); + DeleteRegister(m_mm2); + DeleteRegister(m_mm3); + DeleteRegister(m_mm4); + DeleteRegister(m_mm5); + DeleteRegister(m_mm6); + DeleteRegister(m_mm7); + + // ======================================================================== + // >> 128-bit XMM registers + // ======================================================================== + DeleteRegister(m_xmm0); + DeleteRegister(m_xmm1); + DeleteRegister(m_xmm2); + DeleteRegister(m_xmm3); + DeleteRegister(m_xmm4); + DeleteRegister(m_xmm5); + DeleteRegister(m_xmm6); + DeleteRegister(m_xmm7); + + // 64-bit mode only + /* + DeleteRegister(m_xmm8); + DeleteRegister(m_xmm9); + DeleteRegister(m_xmm10); + DeleteRegister(m_xmm11); + DeleteRegister(m_xmm12); + DeleteRegister(m_xmm13); + DeleteRegister(m_xmm14); + DeleteRegister(m_xmm15); + */ + + // ======================================================================== + // >> 2-bit Segment registers + // ======================================================================== + DeleteRegister(m_cs); + DeleteRegister(m_ss); + DeleteRegister(m_ds); + DeleteRegister(m_es); + DeleteRegister(m_fs); + DeleteRegister(m_gs); + + // ======================================================================== + // >> 80-bit FPU registers + // ======================================================================== + DeleteRegister(m_st0); + DeleteRegister(m_st1); + DeleteRegister(m_st2); + DeleteRegister(m_st3); + DeleteRegister(m_st4); + DeleteRegister(m_st5); + DeleteRegister(m_st6); + DeleteRegister(m_st7); +} + +CRegister* CRegisters::CreateRegister(std::list& registers, Register_t reg, int iSize) +{ + for(std::list::iterator it=registers.begin(); it != registers.end(); it++) + { + if ((*it) == reg) + { + return new CRegister(iSize); + } + } + return NULL; +} + +void CRegisters::DeleteRegister(CRegister* pRegister) +{ + if (pRegister) + { + delete pRegister; + } +} \ No newline at end of file diff --git a/DynamicHooks/registers.h b/DynamicHooks/registers.h new file mode 100644 index 0000000..08083bb --- /dev/null +++ b/DynamicHooks/registers.h @@ -0,0 +1,435 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _REGISTERS_H +#define _REGISTERS_H + + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#include +#include + + +// ============================================================================ +// >> Register_t +// ============================================================================ +enum Register_t +{ + // ======================================================================== + // >> 8-bit General purpose registers + // ======================================================================== + AL, + CL, + DL, + BL, + + // 64-bit mode only + /* + SPL, + BPL, + SIL, + DIL, + R8B, + R9B, + R10B, + R11B, + R12B, + R13B, + R14B, + R15B, + */ + + AH, + CH, + DH, + BH, + + // ======================================================================== + // >> 16-bit General purpose registers + // ======================================================================== + AX, + CX, + DX, + BX, + SP, + BP, + SI, + DI, + + // 64-bit mode only + /* + R8W, + R9W, + R10W, + R11W, + R12W, + R13W, + R14W, + R15W, + */ + + // ======================================================================== + // >> 32-bit General purpose registers + // ======================================================================== + EAX, + ECX, + EDX, + EBX, + ESP, + EBP, + ESI, + EDI, + + // 64-bit mode only + /* + R8D, + R9D, + R10D, + R11D, + R12D, + R13D, + R14D, + R15D, + */ + + // ======================================================================== + // >> 64-bit General purpose registers + // ======================================================================== + // 64-bit mode only + /* + RAX, + RCX, + RDX, + RBX, + RSP, + RBP, + RSI, + RDI, + */ + + // 64-bit mode only + /* + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, + */ + + // ======================================================================== + // >> 64-bit MM (MMX) registers + // ======================================================================== + MM0, + MM1, + MM2, + MM3, + MM4, + MM5, + MM6, + MM7, + + // ======================================================================== + // >> 128-bit XMM registers + // ======================================================================== + XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, + + // 64-bit mode only + /* + XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15, + */ + + // ======================================================================== + // >> 16-bit Segment registers + // ======================================================================== + CS, + SS, + DS, + ES, + FS, + GS, + + // ======================================================================== + // >> 80-bit FPU registers + // ======================================================================== + ST0, + ST1, + ST2, + ST3, + ST4, + ST5, + ST6, + ST7, +}; + + +// ============================================================================ +// >> CRegister +// ============================================================================ +class CRegister +{ +public: + CRegister(int iSize) + { + m_iSize = iSize; + m_pAddress = malloc(iSize); + } + + ~CRegister() + { + free(m_pAddress); + } + + template + T GetValue() + { + return *(T *) m_pAddress; + } + + template + T GetPointerValue(int iOffset=0) + { + return *(T *) (GetValue() + iOffset); + } + + template + void SetValue(T value) + { + *(T *) m_pAddress = value; + } + + template + void SetPointerValue(T value, int iOffset=0) + { + *(T *) (GetValue() + iOffset) = value; + } + +public: + int m_iSize; + void* m_pAddress; +}; + + +// ============================================================================ +// >> CRegisters +// ============================================================================ +class CRegisters +{ +public: + CRegisters(std::list registers); + ~CRegisters(); + +private: + CRegister* CreateRegister(std::list& registers, Register_t reg, int iSize); + void DeleteRegister(CRegister* pRegister); + +public: + // ======================================================================== + // >> 8-bit General purpose registers + // ======================================================================== + CRegister* m_al; + CRegister* m_cl; + CRegister* m_dl; + CRegister* m_bl; + + // 64-bit mode only + /* + CRegister* m_spl; + CRegister* m_bpl; + CRegister* m_sil; + CRegister* m_dil; + CRegister* m_r8b; + CRegister* m_r9b; + CRegister* m_r10b; + CRegister* m_r11b; + CRegister* m_r12b; + CRegister* m_r13b; + CRegister* m_r14b; + CRegister* m_r15b; + */ + + CRegister* m_ah; + CRegister* m_ch; + CRegister* m_dh; + CRegister* m_bh; + + // ======================================================================== + // >> 16-bit General purpose registers + // ======================================================================== + CRegister* m_ax; + CRegister* m_cx; + CRegister* m_dx; + CRegister* m_bx; + CRegister* m_sp; + CRegister* m_bp; + CRegister* m_si; + CRegister* m_di; + + // 64-bit mode only + /* + CRegister* m_r8w; + CRegister* m_r9w; + CRegister* m_r10w; + CRegister* m_r11w; + CRegister* m_r12w; + CRegister* m_r13w; + CRegister* m_r14w; + CRegister* m_r15w; + */ + + // ======================================================================== + // >> 32-bit General purpose registers + // ======================================================================== + CRegister* m_eax; + CRegister* m_ecx; + CRegister* m_edx; + CRegister* m_ebx; + CRegister* m_esp; + CRegister* m_ebp; + CRegister* m_esi; + CRegister* m_edi; + + // 64-bit mode only + /* + CRegister* m_r8d; + CRegister* m_r9d; + CRegister* m_r10d; + CRegister* m_r11d; + CRegister* m_r12d; + CRegister* m_r13d; + CRegister* m_r14d; + CRegister* m_r15d; + */ + + // ======================================================================== + // >> 64-bit General purpose registers + // ======================================================================== + // 64-bit mode only + /* + CRegister* m_rax; + CRegister* m_rcx; + CRegister* m_rdx; + CRegister* m_rbx; + CRegister* m_rsp; + CRegister* m_rbp; + CRegister* m_rsi; + CRegister* m_rdi; + */ + + // 64-bit mode only + /* + CRegister* m_r8; + CRegister* m_r9; + CRegister* m_r10; + CRegister* m_r11; + CRegister* m_r12; + CRegister* m_r13; + CRegister* m_r14; + CRegister* m_r15; + */ + + // ======================================================================== + // >> 64-bit MM (MMX) registers + // ======================================================================== + CRegister* m_mm0; + CRegister* m_mm1; + CRegister* m_mm2; + CRegister* m_mm3; + CRegister* m_mm4; + CRegister* m_mm5; + CRegister* m_mm6; + CRegister* m_mm7; + + // ======================================================================== + // >> 128-bit XMM registers + // ======================================================================== + CRegister* m_xmm0; + CRegister* m_xmm1; + CRegister* m_xmm2; + CRegister* m_xmm3; + CRegister* m_xmm4; + CRegister* m_xmm5; + CRegister* m_xmm6; + CRegister* m_xmm7; + + // 64-bit mode only + /* + CRegister* m_xmm8; + CRegister* m_xmm9; + CRegister* m_xmm10; + CRegister* m_xmm11; + CRegister* m_xmm12; + CRegister* m_xmm13; + CRegister* m_xmm14; + CRegister* m_xmm15; + */ + + // ======================================================================== + // >> 16-bit Segment registers + // ======================================================================== + CRegister* m_cs; + CRegister* m_ss; + CRegister* m_ds; + CRegister* m_es; + CRegister* m_fs; + CRegister* m_gs; + + // ======================================================================== + // >> 80-bit FPU registers + // ======================================================================== + CRegister* m_st0; + CRegister* m_st1; + CRegister* m_st2; + CRegister* m_st3; + CRegister* m_st4; + CRegister* m_st5; + CRegister* m_st6; + CRegister* m_st7; +}; + +#endif // _REGISTERS_H \ No newline at end of file diff --git a/DynamicHooks/thirdparty/AsmJit/apibegin.h b/DynamicHooks/thirdparty/AsmJit/apibegin.h new file mode 100644 index 0000000..ccb1157 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/apibegin.h @@ -0,0 +1,76 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Dependencies] +#if !defined(_ASMJIT_BUILD_H) +#include "./build.h" +#endif // !_ASMJIT_BUILD_H + +// [Guard] +#if !defined(ASMJIT_API_SCOPE) +# define ASMJIT_API_SCOPE +#else +# error "[asmjit] Api-Scope is already active, previous scope not closed by apiend.h?" +#endif // ASMJIT_API_SCOPE + +// [NoExcept] +#if !ASMJIT_CC_HAS_NOEXCEPT && !defined(noexcept) +# define noexcept ASMJIT_NOEXCEPT +# define ASMJIT_UNDEF_NOEXCEPT +#endif // !ASMJIT_CC_HAS_NOEXCEPT && !noexcept + +// [NullPtr] +#if !ASMJIT_CC_HAS_NULLPTR && !defined(nullptr) +# define nullptr NULL +# define ASMJIT_UNDEF_NULLPTR +#endif // !ASMJIT_CC_HAS_NULLPTR && !nullptr + +// [Override] +#if !ASMJIT_CC_HAS_OVERRIDE && !defined(override) +# define override +# define ASMJIT_UNDEF_OVERRIDE +#endif // !ASMJIT_CC_HAS_OVERRIDE && !override + +// [CLang] +#if ASMJIT_CC_CLANG +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunnamed-type-template-args" +#endif // ASMJIT_CC_CLANG + +// [GCC] +#if ASMJIT_CC_GCC +# pragma GCC diagnostic push +# pragma GCC diagnostic warning "-Winline" +#endif // ASMJIT_CC_GCC + +// [MSC] +#if ASMJIT_CC_MSC + +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +# pragma warning(disable: 4201) // nameless struct/union +# pragma warning(disable: 4244) // '+=' : conversion from 'int' to 'x', possible + // loss of data +# pragma warning(disable: 4251) // struct needs to have dll-interface to be used + // by clients of struct ... +# pragma warning(disable: 4275) // non dll-interface struct ... used as base for + // dll-interface struct +# pragma warning(disable: 4355) // this used in base member initializer list +# pragma warning(disable: 4480) // specifying underlying type for enum +# pragma warning(disable: 4800) // forcing value to bool 'true' or 'false' + +// TODO: Check if these defines are needed and for which version of MSC. There are +// news about these as they are part of C99. +# if !defined(vsnprintf) +# define ASMJIT_UNDEF_VSNPRINTF +# define vsnprintf _vsnprintf +# endif // !vsnprintf +# if !defined(snprintf) +# define ASMJIT_UNDEF_SNPRINTF +# define snprintf _snprintf +# endif // !snprintf + +#endif // ASMJIT_CC_MSC diff --git a/DynamicHooks/thirdparty/AsmJit/apiend.h b/DynamicHooks/thirdparty/AsmJit/apiend.h new file mode 100644 index 0000000..39979d9 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/apiend.h @@ -0,0 +1,53 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#if defined(ASMJIT_API_SCOPE) +# undef ASMJIT_API_SCOPE +#else +# error "[asmjit] Api-Scope not active, forgot to include apibegin.h?" +#endif // ASMJIT_API_SCOPE + +// [NoExcept] +#if defined(ASMJIT_UNDEF_NOEXCEPT) +# undef noexcept +# undef ASMJIT_UNDEF_NOEXCEPT +#endif // ASMJIT_UNDEF_NOEXCEPT + +// [NullPtr] +#if defined(ASMJIT_UNDEF_NULLPTR) +# undef nullptr +# undef ASMJIT_UNDEF_NULLPTR +#endif // ASMJIT_UNDEF_NULLPTR + +// [Override] +#if defined(ASMJIT_UNDEF_OVERRIDE) +# undef override +# undef ASMJIT_UNDEF_OVERRIDE +#endif // ASMJIT_UNDEF_OVERRIDE + +// [CLang] +#if ASMJIT_CC_CLANG +# pragma clang diagnostic pop +#endif // ASMJIT_CC_CLANG + +// [GCC] +#if ASMJIT_CC_GCC +# pragma GCC diagnostic pop +#endif // ASMJIT_CC_GCC + +// [MSC] +#if ASMJIT_CC_MSC +# pragma warning(pop) +# if defined(ASMJIT_UNDEF_VSNPRINTF) +# undef vsnprintf +# undef ASMJIT_UNDEF_VSNPRINTF +# endif // ASMJIT_UNDEF_VSNPRINTF +# if defined(ASMJIT_UNDEF_SNPRINTF) +# undef snprintf +# undef ASMJIT_UNDEF_SNPRINTF +# endif // ASMJIT_UNDEF_SNPRINTF +#endif // ASMJIT_CC_MSC diff --git a/DynamicHooks/thirdparty/AsmJit/arm.h b/DynamicHooks/thirdparty/AsmJit/arm.h new file mode 100644 index 0000000..271c7f6 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/arm.h @@ -0,0 +1,20 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_ARM_H +#define _ASMJIT_ARM_H + +// [Dependencies] +#include "./base.h" + +#include "./arm/armassembler.h" +#include "./arm/armcompiler.h" +#include "./arm/arminst.h" +#include "./arm/armoperand.h" + +// [Guard] +#endif // _ASMJIT_ARM_H diff --git a/DynamicHooks/thirdparty/AsmJit/asmjit.h b/DynamicHooks/thirdparty/AsmJit/asmjit.h new file mode 100644 index 0000000..b94f788 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/asmjit.h @@ -0,0 +1,360 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_ASMJIT_H +#define _ASMJIT_ASMJIT_H + +// ============================================================================ +// [asmjit_mainpage] +// ============================================================================ + +//! \mainpage +//! +//! AsmJit - Complete x86/x64 JIT and Remote Assembler for C++. +//! +//! A complete JIT and remote assembler for C++ language. It can generate native +//! code for x86 and x64 architectures and supports the whole x86/x64 instruction +//! set - from legacy MMX to the newest AVX2. It has a type-safe API that allows +//! C++ compiler to do semantic checks at compile-time even before the assembled +//! code is generated and executed. +//! +//! AsmJit is not a virtual machine (VM). It doesn't have functionality to +//! implement VM out of the box; however, it can be be used as a JIT backend +//! of your own VM. The usage of AsmJit is not limited at all; it's suitable +//! for multimedia, VM backends, remote code generation, and many other tasks. +//! +//! \section AsmJit_Main_Concepts Code Generation Concepts +//! +//! AsmJit has two completely different code generation concepts. The difference +//! is in how the code is generated. The first concept, also referred as a low +//! level concept, is called `Assembler` and it's the same as writing RAW +//! assembly by inserting instructions that use physical registers directly. In +//! this case AsmJit does only instruction encoding, verification and final code +//! relocation. +//! +//! The second concept, also referred as a high level concept, is called +//! `Compiler`. Compiler lets you use virtually unlimited number of registers +//! (it calls them variables), which significantly simplifies the code generation +//! process. Compiler allocates these virtual registers to physical registers +//! after the code generation is done. This requires some extra effort - Compiler +//! has to generate information for each node (instruction, function declaration, +//! function call, etc...) in the code, perform a variable liveness analysis and +//! translate the code using variables to a code that uses only physical registers. +//! +//! In addition, Compiler understands functions and their calling conventions. +//! It has been designed in a way that the code generated is always a function +//! having a prototype like a real programming language. By having a function +//! prototype the Compiler is able to insert prolog and epilog sequence to the +//! function being generated and it's able to also generate a necessary code +//! to call other function from your own code. +//! +//! There is no conclusion on which concept is better. `Assembler` brings full +//! control and the best performance, while `Compiler` makes the code-generation +//! more fun and more portable. +//! +//! \section AsmJit_Main_Sections Documentation Sections +//! +//! AsmJit documentation is structured into the following sections: +//! - \ref asmjit_base "Base" - Base API (architecture independent). +//! - \ref asmjit_x86 "X86/X64" - X86/X64 API. +//! +//! \section AsmJit_Main_HomePage AsmJit Homepage +//! +//! - https://github.com/kobalicek/asmjit + +// ============================================================================ +// [asmjit_base] +// ============================================================================ + +//! \defgroup asmjit_base AsmJit Base API (architecture independent) +//! +//! \brief Base API. +//! +//! Base API contains all classes that are platform and architecture independent. +//! +//! Code-Generation and Operands +//! ---------------------------- +//! +//! List of the most useful code-generation and operand classes: +//! - \ref asmjit::Assembler - Low-level code-generation. +//! - \ref asmjit::ExternalTool - An external tool that can serialize to `Assembler`: +//! - \ref asmjit::Compiler - High-level code-generation. +//! - \ref asmjit::Runtime - Describes where the code is stored and how it's executed: +//! - \ref asmjit::HostRuntime - Runtime that runs on the host machine: +//! - \ref asmjit::JitRuntime - Runtime designed for JIT code generation and execution. +//! - \ref asmjit::StaticRuntime - Runtime for code that starts at a specific address. +//! - \ref asmjit::Stream - Stream is a list of \ref HLNode objects stored as a double +//! linked list: +//! - \ref asmjit::HLNode - Base node interface: +//! - \ref asmjit::HLInst - Instruction node. +//! - \ref asmjit::HLData - Data node. +//! - \ref asmjit::HLAlign - Align directive node. +//! - \ref asmjit::HLLabel - Label node. +//! - \ref asmjit::HLComment - Comment node. +//! - \ref asmjit::HLSentinel - Sentinel node. +//! - \ref asmjit::HLHint - Instruction node. +//! - \ref asmjit::HLFunc - Function declaration node. +//! - \ref asmjit::HLRet - Function return node. +//! - \ref asmjit::HLCall - Function call node. +//! - \ref asmjit::HLCallArg - Function call argument node. +//! - \ref asmjit::Operand - base class for all operands: +//! - \ref asmjit::Reg - Register operand (`Assembler` only). +//! - \ref asmjit::Var - Variable operand (`Compiler` only). +//! - \ref asmjit::Mem - Memory operand. +//! - \ref asmjit::Imm - Immediate operand. +//! - \ref asmjit::Label - Label operand. +//! +//! The following snippet shows how to setup a basic JIT code generation: +//! +//! ~~~ +//! using namespace asmjit; +//! +//! int main(int argc, char* argv[]) { +//! // JIT runtime is designed for JIT code generation and execution. +//! JitRuntime runtime; +//! +//! // Assembler instance requires to know the runtime to function. +//! X86Assembler a(&runtime); +//! +//! // Compiler (if you indend to use it) requires an assembler instance. +//! X86Compiler c(&a); +//! +//! return 0; +//! } +//! ~~~ +//! +//! Logging and Error Handling +//! -------------------------- +//! +//! AsmJit contains a robust interface that can be used to log the generated code +//! and to handle possible errors. Base logging interface is provided by \ref +//! Logger, which is abstract and can be used as a base for your own logger. +//! AsmJit also implements some trivial logging concepts out of the box to +//! simplify the development. \ref FileLogger logs into a C `FILE*` stream and +//! \ref StringLogger concatenates all log messages into a single string. +//! +//! The following snippet shows how to setup a basic logger and error handler: +//! +//! ~~~ +//! using namespace asmjit; +//! +//! struct MyErrorHandler : public ErrorHandler { +//! virtual bool handleError(Error code, const char* message, void* origin) { +//! printf("Error 0x%0.8X: %s\n", code, message); +//! +//! // True - error handled and code generation can continue. +//! // False - error not handled, code generation should stop. +//! return false; +//! } +//! } +//! +//! int main(int argc, char* argv[]) { +//! JitRuntime runtime; +//! FileLogger logger(stderr); +//! MyErrorHandler eh; +//! +//! X86Assembler a(&runtime); +//! a.setLogger(&logger); +//! a.setErrorHandler(&eh); +//! +//! ... +//! +//! return 0; +//! } +//! ~~~ +//! +//! AsmJit also contains an \ref ErrorHandler, which is an abstract class that +//! can be used to implement your own error handling. It can be associated with +//! \ref Assembler and used to report all errors. It's a very convenient way to +//! be aware of any error that happens during the code generation without making +//! the error handling complicated. +//! +//! List of the most useful logging and error handling classes: +//! - \ref asmjit::Logger - abstract logging interface: +//! - \ref asmjit::FileLogger - A logger that logs to `FILE*`. +//! - \ref asmjit::StringLogger - A logger that concatenates to a single string. +//! - \ref asmjit::ErrorHandler - Easy way to handle \ref Assembler and \ref +//! Compiler +//! errors. +//! +//! Zone Memory Allocator +//! --------------------- +//! +//! Zone memory allocator is an incremental memory allocator that can be used +//! to allocate data of short life-time. It has much better performance +//! characteristics than all other allocators, because the only thing it can do +//! is to increment a pointer and return its previous address. See \ref Zone +//! for more details. +//! +//! The whole AsmJit library is based on zone memory allocation for performance +//! reasons. It has many other benefits, but the performance was the main one +//! when designing the library. +//! +//! POD Containers +//! -------------- +//! +//! POD containers are used by AsmJit to manage its own data structures. The +//! following classes can be used by AsmJit consumers: +//! +//! - \ref asmjit::BitArray - A fixed bit-array that is used internally. +//! - \ref asmjit::PodVector - A simple array-like container for storing +//! POD data. +//! - \ref asmjit::PodList - A single linked list. +//! - \ref asmjit::StringBuilder - A string builder that can append strings +//! and integers. +//! +//! Utility Functions +//! ----------------- +//! +//! Utility functions are implementated static class \ref Utils. There are +//! utilities for bit manipulation and bit counting, utilities to get an +//! integer minimum / maximum and various other helpers required to perform +//! alignment checks and binary casting from float to integer and vice versa. +//! +//! String utilities are also implemented by a static class \ref Utils. They +//! are mostly used by AsmJit internals and not really important to end users. +//! +//! SIMD Utilities +//! -------------- +//! +//! SIMD code generation often requires to embed constants after each function +//! or at the end of the whole code block. AsmJit contains `Vec64`, `Vec128` +//! and `Vec256` classes that can be used to prepare data useful when generating +//! SIMD code. +//! +//! X86/X64 code generators contain member functions `dmm`, `dxmm`, and `dymm`, +//! which can be used to embed 64-bit, 128-bit and 256-bit data structures into +//! the machine code. + +// ============================================================================ +// [asmjit_x86] +// ============================================================================ + +//! \defgroup asmjit_x86 AsmJit X86/X64 API +//! +//! \brief X86/X64 API +//! +//! X86/X64 Code Generation +//! ----------------------- +//! +//! X86/X64 code generation is realized throught: +//! - \ref X86Assembler - low-level code generation. +//! - \ref X86Compiler - high-level code generation. +//! +//! X86/X64 Registers +//! ----------------- +//! +//! There are static objects that represents X86 and X64 registers. They can +//! be used directly (like `eax`, `mm`, `xmm`, ...) or created through +//! these functions: +//! +//! - `asmjit::x86::gpb_lo()` - Get an 8-bit low GPB register. +//! - `asmjit::x86::gpb_hi()` - Get an 8-bit high GPB register. +//! - `asmjit::x86::gpw()` - Get a 16-bit GPW register. +//! - `asmjit::x86::gpd()` - Get a 32-bit GPD register. +//! - `asmjit::x86::gpq()` - Get a 64-bit GPQ Gp register. +//! - `asmjit::x86::gpz()` - Get a 32-bit or 64-bit GPD/GPQ register. +//! - `asmjit::x86::fp()` - Get a 80-bit FPU register. +//! - `asmjit::x86::mm()` - Get a 64-bit MMX register. +//! - `asmjit::x86::xmm()` - Get a 128-bit XMM register. +//! - `asmjit::x86::ymm()` - Get a 256-bit YMM register. +//! - `asmjit::x86::amm()` - Get a 512-bit ZMM register. +//! +//! X86/X64 Addressing +//! ------------------ +//! +//! X86 and x64 architectures contains several addressing modes and most ones +//! are possible with AsmJit library. Memory represents are represented by +//! `BaseMem` class. These functions are used to make operands that represents +//! memory addresses: +//! +//! - `asmjit::x86::ptr()` - Address size not specified. +//! - `asmjit::x86::byte_ptr()` - 1 byte. +//! - `asmjit::x86::word_ptr()` - 2 bytes (GPW size). +//! - `asmjit::x86::dword_ptr()` - 4 bytes (GPD size). +//! - `asmjit::x86::qword_ptr()` - 8 bytes (GPQ/MMX size). +//! - `asmjit::x86::tword_ptr()` - 10 bytes (FPU size). +//! - `asmjit::x86::dqword_ptr()` - 16 bytes (XMM size). +//! - `asmjit::x86::yword_ptr()` - 32 bytes (YMM size). +//! - `asmjit::x86::zword_ptr()` - 64 bytes (ZMM size). +//! +//! Most useful function to make pointer should be `asmjit::x86::ptr()`. It +//! creates a pointer to the target with an unspecified size. Unspecified size +//! works in all intrinsics where are used registers (this means that size is +//! specified by register operand or by instruction itself). For example +//! `asmjit::x86::ptr()` can't be used with `Assembler::inc()` instruction. In +//! this case the size must be specified and it's also reason to differentiate +//! between pointer sizes. +//! +//! X86 and X86 support simple address forms like `[base + displacement]` and +//! also complex address forms like `[base + index * scale + displacement]`. +//! +//! X86/X64 Immediates +//! ------------------ +//! +//! Immediate values are constants thats passed directly after instruction +//! opcode. To create such value use `asmjit::imm()` or `asmjit::imm_u()` +//! methods to create a signed or unsigned immediate value. +//! +//! X86/X64 CPU Information +//! ----------------------- +//! +//! The CPUID instruction can be used to get an exhaustive information about +//! the host X86/X64 processor. AsmJit contains utilities that can get the most +//! important information related to the features supported by the CPU and the +//! host operating system, in addition to host processor name and number of +//! cores. Class `CpuInfo` provides generic information about a host or target +//! processor and contains also a specific X86/X64 information. +//! +//! By default AsmJit queries the CPU information after the library is loaded +//! and the queried information is reused by all instances of `JitRuntime`. +//! The global instance of `CpuInfo` can't be changed, because it will affect +//! the code generation of all `Runtime`s. If there is a need to have a +//! specific CPU information which contains modified features or processor +//! vendor it's possible by creating a new instance of the `CpuInfo` and setting +//! up its members. +//! +//! Cpu detection is important when generating a JIT code that may or may not +//! use certain CPU features. For example there used to be a SSE/SSE2 detection +//! in the past and today there is often AVX/AVX2 detection. +//! +//! The example below shows how to detect a SSE4.1 instruction set: +//! +//! ~~~ +//! using namespace asmjit; +//! +//! const CpuInfo& cpuInfo = CpuInfo::getHost(); +//! +//! if (cpuInfo.hasFeature(CpuInfo::kX86FeatureSSE4_1)) { +//! // Processor has SSE4.1. +//! } +//! else if (cpuInfo.hasFeature(CpuInfo::kX86FeatureSSE2)) { +//! // Processor doesn't have SSE4.1, but has SSE2. +//! } +//! else { +//! // Processor is archaic; it's a wonder AsmJit works here! +//! } +//! ~~~ + +// [Dependencies] +#include "./base.h" + +// [ARM/ARM64] +#if defined(ASMJIT_BUILD_ARM32) || defined(ASMJIT_BUILD_ARM64) +#include "./arm.h" +#endif // ASMJIT_BUILD_ARM32 || ASMJIT_BUILD_ARM64 + +// [X86/X64] +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) +#include "./x86.h" +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 + +// [Host] +#include "./host.h" + +// [Guard] +#endif // _ASMJIT_ASMJIT_H diff --git a/DynamicHooks/thirdparty/AsmJit/base.h b/DynamicHooks/thirdparty/AsmJit/base.h new file mode 100644 index 0000000..7d8661e --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base.h @@ -0,0 +1,35 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_H +#define _ASMJIT_BASE_H + +// [Dependencies] +#include "./build.h" + +#include "./base/assembler.h" +#include "./base/constpool.h" +#include "./base/containers.h" +#include "./base/cpuinfo.h" +#include "./base/globals.h" +#include "./base/logger.h" +#include "./base/operand.h" +#include "./base/podvector.h" +#include "./base/runtime.h" +#include "./base/utils.h" +#include "./base/vectypes.h" +#include "./base/vmem.h" +#include "./base/zone.h" + +#if !defined(ASMJIT_DISABLE_COMPILER) +#include "./base/compiler.h" +#include "./base/compilerfunc.h" +#include "./base/hlstream.h" +#endif // !ASMJIT_DISABLE_COMPILER + +// [Guard] +#endif // _ASMJIT_BASE_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/assembler.cpp b/DynamicHooks/thirdparty/AsmJit/base/assembler.cpp new file mode 100644 index 0000000..9162ec9 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/assembler.cpp @@ -0,0 +1,503 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/assembler.h" +#include "../base/utils.h" +#include "../base/vmem.h" +#include + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::ErrorHandler] +// ============================================================================ + +ErrorHandler::ErrorHandler() noexcept {} +ErrorHandler::~ErrorHandler() noexcept {} + +ErrorHandler* ErrorHandler::addRef() const noexcept { + return const_cast(this); +} +void ErrorHandler::release() noexcept {} + +// ============================================================================ +// [asmjit::ExternalTool] +// ============================================================================ + +ExternalTool::ExternalTool() noexcept + : _assembler(nullptr), + _exId(0), + _arch(kArchNone), + _regSize(0), + _finalized(false), + _reserved(0), + _lastError(kErrorNotInitialized) {} +ExternalTool::~ExternalTool() noexcept {} + +Error ExternalTool::setLastError(Error error, const char* message) noexcept { + // Special case, reset the last error the error is `kErrorOk`. + if (error == kErrorOk) { + _lastError = kErrorOk; + return kErrorOk; + } + + // Don't do anything if the code-generator doesn't have associated assembler. + Assembler* assembler = getAssembler(); + if (assembler == nullptr) + return error; + + if (message == nullptr) + message = DebugUtils::errorAsString(error); + + // Logging is skipped if the error is handled by `ErrorHandler. + ErrorHandler* eh = assembler->getErrorHandler(); + ASMJIT_TLOG("[ERROR (ExternalTool)] %s (0x%0.8u) %s\n", message, + static_cast(error), + !eh ? "(Possibly unhandled?)" : ""); + + if (eh != nullptr && eh->handleError(error, message, this)) + return error; + +#if !defined(ASMJIT_DISABLE_LOGGER) + Logger* logger = assembler->getLogger(); + if (logger != nullptr) + logger->logFormat(Logger::kStyleComment, + "*** ERROR (ExternalTool): %s (0x%0.8u).\n", message, + static_cast(error)); +#endif // !ASMJIT_DISABLE_LOGGER + + // The handler->handleError() function may throw an exception or longjmp() + // to terminate the execution of `setLastError()`. This is the reason why + // we have delayed changing the `_error` member until now. + _lastError = error; + return error; +} + +// ============================================================================ +// [asmjit::Assembler - Construction / Destruction] +// ============================================================================ + +Assembler::Assembler(Runtime* runtime) noexcept + : _runtime(runtime), + _logger(nullptr), + _errorHandler(nullptr), + _arch(kArchNone), + _regSize(0), + _reserved(0), + _asmOptions(0), + _instOptions(0), + _lastError(runtime ? kErrorOk : kErrorNotInitialized), + _exIdGenerator(0), + _exCountAttached(0), + _zoneAllocator(8192 - Zone::kZoneOverhead), + _buffer(nullptr), + _end(nullptr), + _cursor(nullptr), + _trampolinesSize(0), + _comment(nullptr), + _unusedLinks(nullptr), + _labels(), + _relocations() {} + +Assembler::~Assembler() noexcept { + reset(true); + + if (_errorHandler != nullptr) + _errorHandler->release(); +} + +// ============================================================================ +// [asmjit::Assembler - Reset] +// ============================================================================ + +void Assembler::reset(bool releaseMemory) noexcept { + _asmOptions = 0; + _instOptions = 0; + _lastError = kErrorOk; + _exIdGenerator = 0; + _exCountAttached = 0; + + _zoneAllocator.reset(releaseMemory); + + if (releaseMemory && _buffer != nullptr) { + ASMJIT_FREE(_buffer); + _buffer = nullptr; + _end = nullptr; + } + + _cursor = _buffer; + _trampolinesSize = 0; + + _comment = nullptr; + _unusedLinks = nullptr; + + _sections.reset(releaseMemory); + _labels.reset(releaseMemory); + _relocations.reset(releaseMemory); +} + +// ============================================================================ +// [asmjit::Assembler - Logging & Error Handling] +// ============================================================================ + +Error Assembler::setLastError(Error error, const char* message) noexcept { + // Special case, reset the last error the error is `kErrorOk`. + if (error == kErrorOk) { + _lastError = kErrorOk; + return kErrorOk; + } + + if (message == nullptr) + message = DebugUtils::errorAsString(error); + + // Logging is skipped if the error is handled by `ErrorHandler`. + ErrorHandler* eh = _errorHandler; + ASMJIT_TLOG("[ERROR (Assembler)] %s (0x%0.8u) %s\n", message, + static_cast(error), + !eh ? "(Possibly unhandled?)" : ""); + + if (eh != nullptr && eh->handleError(error, message, this)) + return error; + +#if !defined(ASMJIT_DISABLE_LOGGER) + Logger* logger = _logger; + if (logger != nullptr) + logger->logFormat(Logger::kStyleComment, + "*** ERROR (Assembler): %s (0x%0.8u).\n", message, + static_cast(error)); +#endif // !ASMJIT_DISABLE_LOGGER + + // The handler->handleError() function may throw an exception or longjmp() + // to terminate the execution of `setLastError()`. This is the reason why + // we have delayed changing the `_error` member until now. + _lastError = error; + return error; +} + +Error Assembler::setErrorHandler(ErrorHandler* handler) noexcept { + ErrorHandler* oldHandler = _errorHandler; + + if (oldHandler != nullptr) + oldHandler->release(); + + if (handler != nullptr) + handler = handler->addRef(); + + _errorHandler = handler; + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Assembler - Buffer] +// ============================================================================ + +Error Assembler::_grow(size_t n) noexcept { + size_t capacity = getCapacity(); + size_t after = getOffset() + n; + + // Overflow. + if (n > IntTraits::maxValue() - capacity) + return setLastError(kErrorNoHeapMemory); + + // Grow is called when allocation is needed, so it shouldn't happen, but on + // the other hand it is simple to catch and it's not an error. + if (after <= capacity) + return kErrorOk; + + if (capacity < kMemAllocOverhead) + capacity = kMemAllocOverhead; + else + capacity += kMemAllocOverhead; + + do { + size_t oldCapacity = capacity; + + if (capacity < kMemAllocGrowMax) + capacity *= 2; + else + capacity += kMemAllocGrowMax; + + // Overflow. + if (oldCapacity > capacity) + return setLastError(kErrorNoHeapMemory); + } while (capacity - kMemAllocOverhead < after); + + capacity -= kMemAllocOverhead; + return _reserve(capacity); +} + +Error Assembler::_reserve(size_t n) noexcept { + size_t capacity = getCapacity(); + if (n <= capacity) + return kErrorOk; + + uint8_t* newBuffer; + if (_buffer == nullptr) + newBuffer = static_cast(ASMJIT_ALLOC(n)); + else + newBuffer = static_cast(ASMJIT_REALLOC(_buffer, n)); + + if (newBuffer == nullptr) + return setLastError(kErrorNoHeapMemory); + + size_t offset = getOffset(); + + _buffer = newBuffer; + _end = _buffer + n; + _cursor = newBuffer + offset; + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Assembler - Label] +// ============================================================================ + +Error Assembler::_newLabelId() noexcept { + LabelData* data = _zoneAllocator.allocT(); + + data->offset = -1; + data->links = nullptr; + data->exId = 0; + data->exData = nullptr; + + uint32_t id = OperandUtil::makeLabelId(static_cast(_labels.getLength())); + Error error = _labels.append(data); + + if (error != kErrorOk) { + setLastError(kErrorNoHeapMemory); + return kInvalidValue; + } + + return id; +} + +LabelLink* Assembler::_newLabelLink() noexcept { + LabelLink* link = _unusedLinks; + + if (link) { + _unusedLinks = link->prev; + } + else { + link = _zoneAllocator.allocT(); + if (link == nullptr) + return nullptr; + } + + link->prev = nullptr; + link->offset = 0; + link->displacement = 0; + link->relocId = -1; + + return link; +} + +Error Assembler::bind(const Label& label) noexcept { + // Get label data based on label id. + uint32_t index = label.getId(); + LabelData* data = getLabelData(index); + + // Label can be bound only once. + if (data->offset != -1) + return setLastError(kErrorLabelAlreadyBound); + +#if !defined(ASMJIT_DISABLE_LOGGER) + if (_logger) { + StringBuilderTmp<256> sb; + sb.setFormat("L%u:", index); + + size_t binSize = 0; + if (!_logger->hasOption(Logger::kOptionBinaryForm)) + binSize = kInvalidIndex; + + LogUtil::formatLine(sb, nullptr, binSize, 0, 0, _comment); + _logger->logString(Logger::kStyleLabel, sb.getData(), sb.getLength()); + } +#endif // !ASMJIT_DISABLE_LOGGER + + Error error = kErrorOk; + size_t pos = getOffset(); + + LabelLink* link = data->links; + LabelLink* prev = nullptr; + + while (link) { + intptr_t offset = link->offset; + + if (link->relocId != -1) { + // Handle RelocData - We have to update RelocData information instead of + // patching the displacement in LabelData. + _relocations[link->relocId].data += static_cast(pos); + } + else { + // Not using relocId, this means that we are overwriting a real + // displacement in the binary stream. + int32_t patchedValue = static_cast( + static_cast(pos) - offset + link->displacement); + + // Size of the value we are going to patch. Only BYTE/DWORD is allowed. + uint32_t size = readU8At(offset); + ASMJIT_ASSERT(size == 1 || size == 4); + + if (size == 4) { + writeI32At(offset, patchedValue); + } + else { + ASMJIT_ASSERT(size == 1); + if (Utils::isInt8(patchedValue)) + writeU8At(offset, static_cast(patchedValue) & 0xFF); + else + error = kErrorIllegalDisplacement; + } + } + + prev = link->prev; + link = prev; + } + + // Chain unused links. + link = data->links; + if (link) { + if (prev == nullptr) + prev = link; + + prev->prev = _unusedLinks; + _unusedLinks = link; + } + + // Set as bound (offset is zero or greater and no links). + data->offset = pos; + data->links = nullptr; + + if (error != kErrorOk) + return setLastError(error); + + _comment = nullptr; + return error; +} + +// ============================================================================ +// [asmjit::Assembler - Embed] +// ============================================================================ + +Error Assembler::embed(const void* data, uint32_t size) noexcept { + if (getRemainingSpace() < size) { + Error error = _grow(size); + if (error != kErrorOk) + return setLastError(error); + } + + uint8_t* cursor = getCursor(); + ::memcpy(cursor, data, size); + setCursor(cursor + size); + +#if !defined(ASMJIT_DISABLE_LOGGER) + if (_logger) + _logger->logBinary(Logger::kStyleData, data, size); +#endif // !ASMJIT_DISABLE_LOGGER + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Assembler - Reloc] +// ============================================================================ + +size_t Assembler::relocCode(void* dst, Ptr baseAddress) const noexcept { + if (baseAddress == kNoBaseAddress) + baseAddress = static_cast((uintptr_t)dst); + return _relocCode(dst, baseAddress); +} + +// ============================================================================ +// [asmjit::Assembler - Make] +// ============================================================================ + +void* Assembler::make() noexcept { + // Do nothing on error condition or if no instruction has been emitted. + if (_lastError != kErrorOk || getCodeSize() == 0) + return nullptr; + + void* p; + Error error = _runtime->add(&p, this); + + if (error != kErrorOk) + setLastError(error); + + return p; +} + +// ============================================================================ +// [asmjit::Assembler - Emit (Helpers)] +// ============================================================================ + +#define NA noOperand + +Error Assembler::emit(uint32_t code) { + return _emit(code, NA, NA, NA, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0) { + return _emit(code, o0, NA, NA, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, const Operand& o1) { + return _emit(code, o0, o1, NA, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) { + return _emit(code, o0, o1, o2, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) { + return _emit(code, o0, o1, o2, o3); +} + +Error Assembler::emit(uint32_t code, int o0) { + return _emit(code, Imm(o0), NA, NA, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, int o1) { + return _emit(code, o0, Imm(o1), NA, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, const Operand& o1, int o2) { + return _emit(code, o0, o1, Imm(o2), NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3) { + return _emit(code, o0, o1, o2, Imm(o3)); +} + +Error Assembler::emit(uint32_t code, int64_t o0) { + return _emit(code, Imm(o0), NA, NA, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, int64_t o1) { + return _emit(code, o0, Imm(o1), NA, NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, const Operand& o1, int64_t o2) { + return _emit(code, o0, o1, Imm(o2), NA); +} + +Error Assembler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int64_t o3) { + return _emit(code, o0, o1, o2, Imm(o3)); +} + +#undef NA + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/assembler.h b/DynamicHooks/thirdparty/AsmJit/base/assembler.h new file mode 100644 index 0000000..822de3c --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/assembler.h @@ -0,0 +1,1005 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_ASSEMBLER_H +#define _ASMJIT_BASE_ASSEMBLER_H + +// [Dependencies] +#include "../base/containers.h" +#include "../base/logger.h" +#include "../base/operand.h" +#include "../base/podvector.h" +#include "../base/runtime.h" +#include "../base/zone.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::InstId] +// ============================================================================ + +//! Instruction codes (stub). +ASMJIT_ENUM(InstId) { + //! No instruction. + kInstIdNone = 0 +}; + +// ============================================================================ +// [asmjit::InstOptions] +// ============================================================================ + +//! Instruction options. +ASMJIT_ENUM(InstOptions) { + //! No instruction options. + kInstOptionNone = 0x00000000, + + //! Emit short form of the instruction (X86/X64 only). + //! + //! X86/X64 Specific + //! ---------------- + //! + //! Short form is mostly related to jmp and jcc instructions, but can be used + //! by other instructions supporting 8-bit or 32-bit immediates. This option + //! can be dangerous if the short jmp/jcc is required, but not encodable due + //! to a large displacement, in such case an error is reported. + kInstOptionShortForm = 0x00000001, + + //! Emit long form of the instruction (X86/X64 only). + //! + //! X86/X64 Specific + //! ---------------- + //! + //! Long form is mostly related to jmp and jcc instructions, but like the + //! `kInstOptionShortForm` option it can be used by other instructions + //! supporting both 8-bit and 32-bit immediates. + kInstOptionLongForm = 0x00000002, + + //! Condition is likely to be taken. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! This option has no effect at the moment. Intel stopped supporting + //! conditional hints after P4 and AMD has never supported them. + kInstOptionTaken = 0x00000004, + + //! Condition is unlikely to be taken. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! This option has no effect at the moment. Intel stopped supporting + //! conditional hints after P4 and AMD has never supported them. + kInstOptionNotTaken = 0x00000008, + + //! Don't follow the jump (Compiler only). + //! + //! Prevents following the jump during compilation. + kInstOptionUnfollow = 0x00000010, + + //! Overwrite the destination operand (Compiler only). + //! + //! Hint that is important for variable liveness analysis. It tells the + //! compiler that the destination operand will be overwritten now or by + //! adjacent instructions. Compiler knows when a variable is overwritten by + //! a single instruction, for example you don't have to mark "movaps" or + //! "pxor x, x" instructions, however, if a pair of instructions is used, + //! and the first of them doesn't completely overwrite the content of the + //! destination, then the compiler fails to mark that variable as dead in. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! - All instructions that always overwrite at least the size of the + //! register that the variable uses, for example "mov", "movq", "movaps" + //! don't need the overwrite modifier to be used - conversion, shuffle, + //! and other miscellaneous instructions included. + //! + //! - All instructions that clear the destination register if all operands + //! are the same, for example "xor x, x", "pcmpeqb", etc... + //! + //! - Consecutive instructions that partially overwrite the variable until + //! there is no old content require the `overwrite()` to be used. Some + //! examples (not always the best use cases thought): + //! + //! - `movlps xmm0, ?` followed by `movhps xmm0, ?` and vice versa + //! - `movlpd xmm0, ?` followed by `movhpd xmm0, ?` and vice versa + //! - `mov al, ?` followed by `and ax, 0xFF` + //! - `mov al, ?` followed by `mov ah, al` + //! - `pinsrq xmm0, ?, 0` followed by `pinsrq xmm0, ?, 1` + //! + //! - If allocated variable is used temporarily for scalar operations. For + //! example if you allocate a full vector like `X86Compiler::newXmm()` + //! and then use that vector for scalar operations you should use + //! `overwrite()` directive: + //! + //! - `sqrtss x, y` - only LO element of `x` is changed, if you don't use + //! HI elements, use `X86Compiler.overwrite().sqrtss(x, y)`. + kInstOptionOverwrite = 0x00000020 +}; + +// ============================================================================ +// [asmjit::AlignMode] +// ============================================================================ + +//! Code aligning mode. +ASMJIT_ENUM(AlignMode) { + //! Align by emitting a sequence that can be executed (code). + kAlignCode = 0, + //! Align by emitting a sequence that shouldn't be executed (data). + kAlignData = 1, + //! Align by emitting a sequence of zeros. + kAlignZero = 2 +}; + +// ============================================================================ +// [asmjit::RelocMode] +// ============================================================================ + +//! Relocation mode. +ASMJIT_ENUM(RelocMode) { + //! Relocate an absolute address to an absolute address. + kRelocAbsToAbs = 0, + //! Relocate a relative address to an absolute address. + kRelocRelToAbs = 1, + //! Relocate an absolute address to a relative address. + kRelocAbsToRel = 2, + //! Relocate an absolute address to a relative address or use trampoline. + kRelocTrampoline = 3 +}; + +// ============================================================================ +// [asmjit::LabelLink] +// ============================================================================ + +//! \internal +//! +//! Data structure used to link labels. +struct LabelLink { + //! Previous link. + LabelLink* prev; + //! Offset. + intptr_t offset; + //! Inlined displacement. + intptr_t displacement; + //! RelocId in case the link has to be absolute after relocated. + intptr_t relocId; +}; + +// ============================================================================ +// [asmjit::LabelData] +// ============================================================================ + +//! \internal +//! +//! Label data. +struct LabelData { + //! Label offset. + intptr_t offset; + //! Label links chain. + LabelLink* links; + + //! External tool ID, if linked to any. + uint64_t exId; + //! Pointer to a data that `ExternalTool` associated with the label. + void* exData; +}; + +// ============================================================================ +// [asmjit::RelocData] +// ============================================================================ + +//! \internal +//! +//! Code relocation data (relative vs. absolute addresses). +//! +//! X86/X64 Specific +//! ---------------- +//! +//! X86 architecture uses 32-bit absolute addressing model by memory operands, +//! but 64-bit mode uses relative addressing model (RIP + displacement). In +//! code we are always using relative addressing model for referencing labels +//! and embedded data. In 32-bit mode we must patch all references to absolute +//! address before we can call generated function. +struct RelocData { + //! Type of relocation. + uint32_t type; + //! Size of relocation (4 or 8 bytes). + uint32_t size; + + //! Offset from the initial code address. + Ptr from; + //! Relative displacement from the initial code address or from the absolute address. + Ptr data; +}; + +// ============================================================================ +// [asmjit::ErrorHandler] +// ============================================================================ + +//! Error handler. +//! +//! Error handler can be used to override the default behavior of `Assembler` +//! error handling and propagation. See `handleError()` on how to override it. +//! +//! Please note that `addRef` and `release` functions are used, but there is +//! no reference counting implemented by default, reimplement to change the +//! default behavior. +class ASMJIT_VIRTAPI ErrorHandler { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `ErrorHandler` instance. + ASMJIT_API ErrorHandler() noexcept; + //! Destroy the `ErrorHandler` instance. + ASMJIT_API virtual ~ErrorHandler() noexcept; + + // -------------------------------------------------------------------------- + // [AddRef / Release] + // -------------------------------------------------------------------------- + + //! Reference this error handler. + //! + //! NOTE: This member function is provided for convenience. The default + //! implementation does nothing. If you are working in environment where + //! multiple `ErrorHandler` instances are used by a different code generators + //! you may provide your own functionality for reference counting. In that + //! case `addRef()` and `release()` functions should be overridden. + ASMJIT_API virtual ErrorHandler* addRef() const noexcept; + + //! Release this error handler. + //! + //! NOTE: This member function is provided for convenience. See `addRef()` + //! for more detailed information related to reference counting. + ASMJIT_API virtual void release() noexcept; + + // -------------------------------------------------------------------------- + // [Handle Error] + // -------------------------------------------------------------------------- + + //! Error handler (pure). + //! + //! Error handler is called after an error happened. An error can happen in + //! many places, but error handler is mostly used by `Assembler` to report + //! anything a fatal problem. There are multiple ways how the error handler + //! can be used: + //! + //! 1. Returning `true` or `false` from `handleError()`. If `true` is + //! returned it means that error was reported and AsmJit can continue + //! with code-generation. However, `false` reports to AsmJit that the + //! error cannot be handled, in such case it stores the error in + //! `Assembler` and puts it into an error state. The error is accessible + //! through `Assembler::getLastError(). Returning `false` is default when + //! no error handler is used. + //! + //! 2. AsmJit doesn't use exception handling so your error should also not + //! throw an exception, however, it's possible to use plain old C's + //! `setjmp()` and `longjmp()`. Asmjit always puts `Assembler` and + //! `Compiler` to a consistent state before calling the `handleError()`, + //! so you can use `longjmp()` to leave the code-generation if an error + //! happened. + virtual bool handleError(Error code, const char* message, void* origin) noexcept = 0; +}; + +// ============================================================================ +// [asmjit::ExternalTool] +// ============================================================================ + +//! An external tool (i.e. `Stream` or `Compiler`) that can serialize to `Assembler` +class ASMJIT_VIRTAPI ExternalTool { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API ExternalTool() noexcept; + ASMJIT_API virtual ~ExternalTool() noexcept; + + // -------------------------------------------------------------------------- + // [Attach / Reset] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Called to attach this code generator to the `assembler`. + virtual Error attach(Assembler* assembler) noexcept = 0; + + //! Reset the code-generator (also detaches if attached). + virtual void reset(bool releaseMemory) noexcept = 0; + + // -------------------------------------------------------------------------- + // [Finalize] + // -------------------------------------------------------------------------- + + //! Finalize the code-generation. + //! + //! The finalization has two passes: + //! - serializes code to the attached assembler. + //! - resets the `ExternalTool` (detaching from the `Assembler as well) so + //! it can be reused or destroyed). + virtual Error finalize() noexcept = 0; + + // -------------------------------------------------------------------------- + // [Runtime / Assembler] + // -------------------------------------------------------------------------- + + //! Get the `Runtime` instance that is associated with the code-generator. + ASMJIT_INLINE Runtime* getRuntime() const noexcept { return _runtime; } + //! Get the `Assembler` instance that is associated with the code-generator. + ASMJIT_INLINE Assembler* getAssembler() const noexcept { return _assembler; } + + // -------------------------------------------------------------------------- + // [Architecture] + // -------------------------------------------------------------------------- + + //! Get the target architecture. + ASMJIT_INLINE uint32_t getArch() const noexcept { return _arch; } + //! Get the default register size - 4 or 8 bytes, depends on the target. + ASMJIT_INLINE uint32_t getRegSize() const noexcept { return _regSize; } + + // -------------------------------------------------------------------------- + // [Error Handling] + // -------------------------------------------------------------------------- + + //! Get the last error code. + ASMJIT_INLINE Error getLastError() const noexcept { return _lastError; } + //! Set the last error code and propagate it through the error handler. + ASMJIT_API Error setLastError(Error error, const char* message = nullptr) noexcept; + //! Clear the last error code. + ASMJIT_INLINE void resetLastError() noexcept { _lastError = kErrorOk; } + + // -------------------------------------------------------------------------- + // [ID] + // -------------------------------------------------------------------------- + + //! Get the tool ID, provided by `Assembler` when attached to it. + ASMJIT_INLINE uint64_t getExId() const noexcept { return _exId; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Associated runtime. + Runtime* _runtime; + //! Associated assembler. + Assembler* _assembler; + + //! `ExternalTool` ID, provided by `Assembler`. + //! + //! If multiple high-evel code generators are associated with a single + //! assembler the `_exId` member can be used to distinguish between them and + //! to provide a mechanism to check whether the high-level code generator is + //! accessing the resource it really owns. + uint64_t _exId; + + //! Target's architecture ID. + uint8_t _arch; + //! Target's architecture GP register size in bytes (4 or 8). + uint8_t _regSize; + //! The code generator has been finalized. + uint8_t _finalized; + //! \internal + uint8_t _reserved; + //! Last error code. + uint32_t _lastError; +}; + +// ============================================================================ +// [asmjit::Assembler] +// ============================================================================ + +//! Base assembler. +//! +//! This class implements a base interface that is used by architecture +//! specific assemblers. +//! +//! \sa Compiler. +class ASMJIT_VIRTAPI Assembler { + public: + ASMJIT_NO_COPY(Assembler) + + // -------------------------------------------------------------------------- + // [Options] + // -------------------------------------------------------------------------- + + //! Assembler options. + ASMJIT_ENUM(Options) { + //! Emit optimized code-alignment sequences (`Assembler` and `Compiler`). + //! + //! Default `true`. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! Default align sequence used by X86/X64 architecture is one-byte 0x90 + //! opcode that is mostly shown by disassemblers as nop. However there are + //! more optimized align sequences for 2-11 bytes that may execute faster. + //! If this feature is enabled asmjit will generate specialized sequences + //! for alignment between 1 to 11 bytes. Also when `X86Compiler` is used, + //! it can add REX prefixes into the code to make some instructions greater + //! so no alignment sequence is needed. + kOptionOptimizedAlign = 0, + + //! Emit jump-prediction hints (`Assembler` and `Compiler`). + //! + //! Default `false`. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! Jump prediction is usually based on the direction of the jump. If the + //! jump is backward it is usually predicted as taken; and if the jump is + //! forward it is usually predicted as not-taken. The reason is that loops + //! generally use backward jumps and conditions usually use forward jumps. + //! However this behavior can be overridden by using instruction prefixes. + //! If this option is enabled these hints will be emitted. + //! + //! This feature is disabled by default, because the only processor that + //! used to take into consideration prediction hints was P4. Newer processors + //! implement heuristics for branch prediction that ignores any static hints. + kOptionPredictedJumps = 1 + }; + + // -------------------------------------------------------------------------- + // [Buffer] + // -------------------------------------------------------------------------- + + //! Code or data buffer. + struct Buffer { + //! Code data. + uint8_t* data; + //! Total length of `data` in bytes. + size_t capacity; + //! Number of bytes of `data` used. + size_t length; + //! Current offset (assembler's cursor) in bytes. + size_t offset; + }; + + // -------------------------------------------------------------------------- + // [Section] + // -------------------------------------------------------------------------- + + //! Code or data section. + struct Section { + //! Section id. + uint32_t id; + //! Section flags. + uint32_t flags; + //! Section name (limited to 35 characters, PE allows max 8 chars). + char name[36]; + //! Section alignment requirements (0 if no requirements). + uint32_t alignment; + //! Section content. + Buffer content; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `Assembler` instance. + ASMJIT_API Assembler(Runtime* runtime) noexcept; + //! Destroy the `Assembler` instance. + ASMJIT_API virtual ~Assembler() noexcept; + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! Reset the assembler. + //! + //! If `releaseMemory` is true all buffers will be released to the system. + ASMJIT_API void reset(bool releaseMemory = false) noexcept; + + // -------------------------------------------------------------------------- + // [Runtime] + // -------------------------------------------------------------------------- + + //! Get the runtime associated with the assembler. + //! + //! NOTE: Runtime is persistent across `reset()` calls. + ASMJIT_INLINE Runtime* getRuntime() const noexcept { return _runtime; } + + // -------------------------------------------------------------------------- + // [Architecture] + // -------------------------------------------------------------------------- + + //! Get the target architecture. + ASMJIT_INLINE uint32_t getArch() const noexcept { return _arch; } + //! Get the default register size - 4 or 8 bytes, depends on the target. + ASMJIT_INLINE uint32_t getRegSize() const noexcept { return _regSize; } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_DISABLE_LOGGER) + //! Get whether the assembler has a logger. + ASMJIT_INLINE bool hasLogger() const noexcept { return _logger != nullptr; } + //! Get the logger. + ASMJIT_INLINE Logger* getLogger() const noexcept { return _logger; } + //! Set the logger to `logger`. + ASMJIT_INLINE void setLogger(Logger* logger) noexcept { _logger = logger; } +#endif // !ASMJIT_DISABLE_LOGGER + + // -------------------------------------------------------------------------- + // [Error Handling] + // -------------------------------------------------------------------------- + + //! Get the error handler. + ASMJIT_INLINE ErrorHandler* getErrorHandler() const noexcept { return _errorHandler; } + //! Set the error handler. + ASMJIT_API Error setErrorHandler(ErrorHandler* handler) noexcept; + //! Clear the error handler. + ASMJIT_INLINE Error resetErrorHandler() noexcept { return setErrorHandler(nullptr); } + + //! Get the last error code. + ASMJIT_INLINE Error getLastError() const noexcept { return _lastError; } + //! Set the last error code and propagate it through the error handler. + ASMJIT_API Error setLastError(Error error, const char* message = nullptr) noexcept; + //! Clear the last error code. + ASMJIT_INLINE void resetLastError() noexcept { _lastError = kErrorOk; } + + // -------------------------------------------------------------------------- + // [Serializers] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Called after the code generator `cg` has been attached to the assembler. + ASMJIT_INLINE void _attached(ExternalTool* exTool) noexcept { + exTool->_runtime = getRuntime(); + exTool->_assembler = this; + exTool->_exId = _nextExId(); + _exCountAttached++; + } + + //! \internal + //! + //! Called after the code generator `cg` has been detached from the assembler. + ASMJIT_INLINE void _detached(ExternalTool* exTool) noexcept { + exTool->_runtime = nullptr; + exTool->_assembler = nullptr; + exTool->_exId = 0; + _exCountAttached--; + } + + //! \internal + //! + //! Return a new code-gen ID (always greater than zero). + ASMJIT_INLINE uint64_t _nextExId() noexcept { + ASMJIT_ASSERT(_exIdGenerator != ASMJIT_UINT64_C(0xFFFFFFFFFFFFFFFF)); + return ++_exIdGenerator; + } + + // -------------------------------------------------------------------------- + // [Assembler Options] + // -------------------------------------------------------------------------- + + //! Get global assembler options. + ASMJIT_INLINE uint32_t getAsmOptions() const noexcept { + return _asmOptions; + } + //! Get whether the global assembler `option` is turned on. + ASMJIT_INLINE bool hasAsmOption(uint32_t option) const noexcept { + return (_asmOptions & option) != 0; + } + //! Turn on global assembler `options`. + ASMJIT_INLINE void addAsmOptions(uint32_t options) noexcept { + _asmOptions |= options; + } + //! Turn off global assembler `options`. + ASMJIT_INLINE void clearAsmOptions(uint32_t options) noexcept { + _asmOptions &= ~options; + } + + // -------------------------------------------------------------------------- + // [Instruction Options] + // -------------------------------------------------------------------------- + + //! Get options of the next instruction. + ASMJIT_INLINE uint32_t getInstOptions() const noexcept { + return _instOptions; + } + //! Set options of the next instruction. + ASMJIT_INLINE void setInstOptions(uint32_t instOptions) noexcept { + _instOptions = instOptions; + } + //! Get options of the next instruction and reset them. + ASMJIT_INLINE uint32_t getInstOptionsAndReset() noexcept { + uint32_t instOptions = _instOptions; + _instOptions = 0; + return instOptions; + }; + + // -------------------------------------------------------------------------- + // [Code-Buffer] + // -------------------------------------------------------------------------- + + //! Grow the code-buffer. + //! + //! The internal code-buffer will grow at least by `n` bytes so `n` bytes can + //! be added to it. If `n` is zero or `getOffset() + n` is not greater than + //! the current capacity of the code-buffer this function does nothing. + ASMJIT_API Error _grow(size_t n) noexcept; + //! Reserve the code-buffer to at least `n` bytes. + ASMJIT_API Error _reserve(size_t n) noexcept; + + //! Get capacity of the code-buffer. + ASMJIT_INLINE size_t getCapacity() const noexcept { + return (size_t)(_end - _buffer); + } + //! Get the number of remaining bytes in code-buffer. + ASMJIT_INLINE size_t getRemainingSpace() const noexcept { + return (size_t)(_end - _cursor); + } + + //! Get current offset in buffer, same as `getOffset() + getTramplineSize()`. + ASMJIT_INLINE size_t getCodeSize() const noexcept { + return getOffset() + getTrampolinesSize(); + } + + //! Get size of all possible trampolines. + //! + //! Trampolines are needed to successfuly generate relative jumps to absolute + //! addresses. This value is only non-zero if jmp of call instructions were + //! used with immediate operand (this means jumping or calling an absolute + //! address directly). + ASMJIT_INLINE size_t getTrampolinesSize() const noexcept { return _trampolinesSize; } + + //! Get code-buffer. + ASMJIT_INLINE uint8_t* getBuffer() const noexcept { return _buffer; } + //! Get the end of the code-buffer (points to the first byte that is invalid). + ASMJIT_INLINE uint8_t* getEnd() const noexcept { return _end; } + + //! Get the current position in the code-buffer. + ASMJIT_INLINE uint8_t* getCursor() const noexcept { return _cursor; } + //! Set the current position in the buffer. + ASMJIT_INLINE void setCursor(uint8_t* cursor) noexcept { + ASMJIT_ASSERT(cursor >= _buffer && cursor <= _end); + _cursor = cursor; + } + + //! Get the current offset in the buffer. + ASMJIT_INLINE size_t getOffset() const noexcept { return (size_t)(_cursor - _buffer); } + //! Set the current offset in the buffer to `offset` and return the previous value. + ASMJIT_INLINE size_t setOffset(size_t offset) noexcept { + ASMJIT_ASSERT(offset < getCapacity()); + + size_t oldOffset = (size_t)(_cursor - _buffer); + _cursor = _buffer + offset; + return oldOffset; + } + + //! Read `int8_t` at index `pos`. + ASMJIT_INLINE int32_t readI8At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 1 <= (size_t)(_end - _buffer)); + return Utils::readI8(_buffer + pos); + } + + //! Read `uint8_t` at index `pos`. + ASMJIT_INLINE uint32_t readU8At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 1 <= (size_t)(_end - _buffer)); + return Utils::readU8(_buffer + pos); + } + + //! Read `int16_t` at index `pos`. + ASMJIT_INLINE int32_t readI16At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 2 <= (size_t)(_end - _buffer)); + return Utils::readI16u(_buffer + pos); + } + + //! Read `uint16_t` at index `pos`. + ASMJIT_INLINE uint32_t readU16At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 2 <= (size_t)(_end - _buffer)); + return Utils::readU16u(_buffer + pos); + } + + //! Read `int32_t` at index `pos`. + ASMJIT_INLINE int32_t readI32At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + return Utils::readI32u(_buffer + pos); + } + + //! Read `uint32_t` at index `pos`. + ASMJIT_INLINE uint32_t readU32At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + return Utils::readU32u(_buffer + pos); + } + + //! Read `uint64_t` at index `pos`. + ASMJIT_INLINE int64_t readI64At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 8 <= (size_t)(_end - _buffer)); + return Utils::readI64u(_buffer + pos); + } + + //! Read `uint64_t` at index `pos`. + ASMJIT_INLINE uint64_t readU64At(size_t pos) const noexcept { + ASMJIT_ASSERT(pos + 8 <= (size_t)(_end - _buffer)); + return Utils::readU64u(_buffer + pos); + } + + //! Write `int8_t` at index `pos`. + ASMJIT_INLINE void writeI8At(size_t pos, int32_t x) noexcept { + ASMJIT_ASSERT(pos + 1 <= (size_t)(_end - _buffer)); + Utils::writeI8(_buffer + pos, x); + } + + //! Write `uint8_t` at index `pos`. + ASMJIT_INLINE void writeU8At(size_t pos, uint32_t x) noexcept { + ASMJIT_ASSERT(pos + 1 <= (size_t)(_end - _buffer)); + Utils::writeU8(_buffer + pos, x); + } + + //! Write `int8_t` at index `pos`. + ASMJIT_INLINE void writeI16At(size_t pos, int32_t x) noexcept { + ASMJIT_ASSERT(pos + 2 <= (size_t)(_end - _buffer)); + Utils::writeI16u(_buffer + pos, x); + } + + //! Write `uint8_t` at index `pos`. + ASMJIT_INLINE void writeU16At(size_t pos, uint32_t x) noexcept { + ASMJIT_ASSERT(pos + 2 <= (size_t)(_end - _buffer)); + Utils::writeU16u(_buffer + pos, x); + } + + //! Write `int32_t` at index `pos`. + ASMJIT_INLINE void writeI32At(size_t pos, int32_t x) noexcept { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + Utils::writeI32u(_buffer + pos, x); + } + + //! Write `uint32_t` at index `pos`. + ASMJIT_INLINE void writeU32At(size_t pos, uint32_t x) noexcept { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + Utils::writeU32u(_buffer + pos, x); + } + + //! Write `int64_t` at index `pos`. + ASMJIT_INLINE void writeI64At(size_t pos, int64_t x) noexcept { + ASMJIT_ASSERT(pos + 8 <= (size_t)(_end - _buffer)); + Utils::writeI64u(_buffer + pos, x); + } + + //! Write `uint64_t` at index `pos`. + ASMJIT_INLINE void writeU64At(size_t pos, uint64_t x) noexcept { + ASMJIT_ASSERT(pos + 8 <= (size_t)(_end - _buffer)); + Utils::writeU64u(_buffer + pos, x); + } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! Embed raw data into the code-buffer. + ASMJIT_API virtual Error embed(const void* data, uint32_t size) noexcept; + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + //! Align target buffer to the `offset` specified. + //! + //! The sequence that is used to fill the gap between the aligned location + //! and the current depends on `alignMode`, see \ref AlignMode. + virtual Error align(uint32_t alignMode, uint32_t offset) noexcept = 0; + + // -------------------------------------------------------------------------- + // [Label] + // -------------------------------------------------------------------------- + + //! Get number of labels created. + ASMJIT_INLINE size_t getLabelsCount() const noexcept { + return _labels.getLength(); + } + + //! Get whether the `label` is valid (i.e. registered). + ASMJIT_INLINE bool isLabelValid(const Label& label) const noexcept { + return isLabelValid(label.getId()); + } + //! Get whether the label `id` is valid (i.e. registered). + ASMJIT_INLINE bool isLabelValid(uint32_t id) const noexcept { + return static_cast(id) < _labels.getLength(); + } + + //! Get whether the `label` is bound. + //! + //! NOTE: It's an error to pass label that is not valid. Check the validity + //! of the label by using `isLabelValid()` method before the bound check if + //! you are not sure about its validity, otherwise you may hit an assertion + //! failure in debug mode, and undefined behavior in release mode. + ASMJIT_INLINE bool isLabelBound(const Label& label) const noexcept { + return isLabelBound(label.getId()); + } + //! \overload + ASMJIT_INLINE bool isLabelBound(uint32_t id) const noexcept { + ASMJIT_ASSERT(isLabelValid(id)); + return _labels[id]->offset != -1; + } + + //! Get a `label` offset or -1 if the label is not yet bound. + ASMJIT_INLINE intptr_t getLabelOffset(const Label& label) const noexcept { + return getLabelOffset(label.getId()); + } + //! \overload + ASMJIT_INLINE intptr_t getLabelOffset(uint32_t id) const noexcept { + ASMJIT_ASSERT(isLabelValid(id)); + return _labels[id]->offset; + } + + //! Get `LabelData` by `label`. + ASMJIT_INLINE LabelData* getLabelData(const Label& label) const noexcept { + return getLabelData(label.getId()); + } + //! \overload + ASMJIT_INLINE LabelData* getLabelData(uint32_t id) const noexcept { + ASMJIT_ASSERT(isLabelValid(id)); + return const_cast(_labels[id]); + } + + //! \internal + //! + //! Create a new label and return its ID. + ASMJIT_API uint32_t _newLabelId() noexcept; + + //! \internal + //! + //! New LabelLink instance. + ASMJIT_API LabelLink* _newLabelLink() noexcept; + + //! Create and return a new `Label`. + ASMJIT_INLINE Label newLabel() noexcept { return Label(_newLabelId()); } + + //! Bind the `label` to the current offset. + //! + //! NOTE: Label can be bound only once! + ASMJIT_API virtual Error bind(const Label& label) noexcept; + + // -------------------------------------------------------------------------- + // [Reloc] + // -------------------------------------------------------------------------- + + //! Relocate the code to `baseAddress` and copy it to `dst`. + //! + //! \param dst Contains the location where the relocated code should be + //! copied. The pointer can be address returned by virtual memory allocator + //! or any other address that has sufficient space. + //! + //! \param baseAddress Base address used for relocation. The `JitRuntime` + //! always sets the `baseAddress` address to be the same as `dst`, but other + //! runtimes, for example `StaticRuntime`, do not have to follow this rule. + //! + //! \retval The number bytes actually used. If the code generator reserved + //! space for possible trampolines, but didn't use it, the number of bytes + //! used can actually be less than the expected worst case. Virtual memory + //! allocator can shrink the memory allocated first time. + //! + //! A given buffer will be overwritten, to get the number of bytes required, + //! use `getCodeSize()`. + ASMJIT_API size_t relocCode(void* dst, Ptr baseAddress = kNoBaseAddress) const noexcept; + + //! \internal + //! + //! Reloc code. + virtual size_t _relocCode(void* dst, Ptr baseAddress) const noexcept = 0; + + // -------------------------------------------------------------------------- + // [Make] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual void* make() noexcept; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + //! Emit an instruction (virtual). + virtual Error _emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) = 0; + + //! Emit an instruction. + ASMJIT_API Error emit(uint32_t code); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3); + + //! Emit an instruction that has an immediate operand. + ASMJIT_API Error emit(uint32_t code, int o0); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, int o1); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, int o2); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3); + + //! \overload + ASMJIT_API Error emit(uint32_t code, int64_t o0); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, int64_t o1); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, int64_t o2); + //! \overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int64_t o3); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Associated runtime. + Runtime* _runtime; + //! Associated logger. + Logger* _logger; + //! Associated error handler, triggered by \ref setLastError(). + ErrorHandler* _errorHandler; + + //! Target architecture ID. + uint8_t _arch; + //! Target architecture GP register size in bytes (4 or 8). + uint8_t _regSize; + //! \internal + uint16_t _reserved; + + //! Assembler options, used by \ref getAsmOptions() and \ref hasAsmOption(). + uint32_t _asmOptions; + //! Instruction options, affect the next instruction that will be emitted. + uint32_t _instOptions; + //! Last error code. + uint32_t _lastError; + + //! External tool ID generator. + uint64_t _exIdGenerator; + //! Count of external tools currently attached. + size_t _exCountAttached; + + //! General purpose zone allocator. + Zone _zoneAllocator; + + //! Start of the code-buffer of the current section. + uint8_t* _buffer; + //! End of the code-buffer of the current section (points to the first invalid byte). + uint8_t* _end; + //! The current position in `_buffer` of the current section. + uint8_t* _cursor; + + //! Size of all possible trampolines. + uint32_t _trampolinesSize; + + //! Inline comment that will be logged by the next instruction and set to nullptr. + const char* _comment; + //! Unused `LabelLink` structures pool. + LabelLink* _unusedLinks; + + //! Assembler sections. + PodVectorTmp _sections; + //! Assembler labels. + PodVectorTmp _labels; + //! Table of relocations. + PodVector _relocations; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_ASSEMBLER_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/compiler.cpp b/DynamicHooks/thirdparty/AsmJit/base/compiler.cpp new file mode 100644 index 0000000..8b7116d --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/compiler.cpp @@ -0,0 +1,630 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/assembler.h" +#include "../base/compiler.h" +#include "../base/compilercontext_p.h" +#include "../base/cpuinfo.h" +#include "../base/logger.h" +#include "../base/utils.h" +#include + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Constants] +// ============================================================================ + +static const char noName[1] = { '\0' }; +enum { kCompilerDefaultLookAhead = 64 }; + +// ============================================================================ +// [asmjit::Compiler - Construction / Destruction] +// ============================================================================ + +Compiler::Compiler() noexcept + : _features(0), + _maxLookAhead(kCompilerDefaultLookAhead), + _instOptions(0), + _tokenGenerator(0), + _nodeFlowId(0), + _nodeFlags(0), + _targetVarMapping(nullptr), + _firstNode(nullptr), + _lastNode(nullptr), + _cursor(nullptr), + _func(nullptr), + _zoneAllocator(8192 - Zone::kZoneOverhead), + _varAllocator(4096 - Zone::kZoneOverhead), + _stringAllocator(4096 - Zone::kZoneOverhead), + _constAllocator(4096 - Zone::kZoneOverhead), + _localConstPool(&_constAllocator), + _globalConstPool(&_zoneAllocator) {} +Compiler::~Compiler() noexcept {} + +// ============================================================================ +// [asmjit::Compiler - Attach / Reset] +// ============================================================================ + +void Compiler::reset(bool releaseMemory) noexcept { + Assembler* assembler = getAssembler(); + if (assembler != nullptr) + assembler->_detached(this); + + _arch = kArchNone; + _regSize = 0; + _finalized = false; + _lastError = kErrorNotInitialized; + + _features = 0; + _maxLookAhead = kCompilerDefaultLookAhead; + + _instOptions = 0; + _tokenGenerator = 0; + + _nodeFlowId = 0; + _nodeFlags = 0; + + _firstNode = nullptr; + _lastNode = nullptr; + + _cursor = nullptr; + _func = nullptr; + + _localConstPool.reset(); + _globalConstPool.reset(); + + _localConstPoolLabel.reset(); + _globalConstPoolLabel.reset(); + + _zoneAllocator.reset(releaseMemory); + _varAllocator.reset(releaseMemory); + _stringAllocator.reset(releaseMemory); + _constAllocator.reset(releaseMemory); + + _varList.reset(releaseMemory); +} + +// ============================================================================ +// [asmjit::Compiler - Node-Factory] +// ============================================================================ + +HLData* Compiler::newDataNode(const void* data, uint32_t size) noexcept { + if (size > HLData::kInlineBufferSize) { + void* clonedData = _stringAllocator.alloc(size); + if (clonedData == nullptr) + return nullptr; + + if (data != nullptr) + ::memcpy(clonedData, data, size); + data = clonedData; + } + + return newNode(const_cast(data), size); +} + +HLAlign* Compiler::newAlignNode(uint32_t alignMode, uint32_t offset) noexcept { + return newNode(alignMode, offset); +} + +HLLabel* Compiler::newLabelNode() noexcept { + Assembler* assembler = getAssembler(); + if (assembler == nullptr) return nullptr; + + uint32_t id = assembler->_newLabelId(); + LabelData* ld = assembler->getLabelData(id); + + HLLabel* node = newNode(id); + if (node == nullptr) return nullptr; + + // These have to be zero now. + ASMJIT_ASSERT(ld->exId == 0); + ASMJIT_ASSERT(ld->exData == nullptr); + + ld->exId = _exId; + ld->exData = node; + + return node; +} + +HLComment* Compiler::newCommentNode(const char* str) noexcept { + if (str != nullptr && str[0]) { + str = _stringAllocator.sdup(str); + if (str == nullptr) + return nullptr; + } + + return newNode(str); +} + +HLHint* Compiler::newHintNode(Var& var, uint32_t hint, uint32_t value) noexcept { + if (var.getId() == kInvalidValue) + return nullptr; + + VarData* vd = getVd(var); + return newNode(vd, hint, value); +} + +// ============================================================================ +// [asmjit::Compiler - Code-Stream] +// ============================================================================ + +HLNode* Compiler::addFunc(HLFunc* func) noexcept { + ASMJIT_ASSERT(_func == nullptr); + _func = func; + + addNode(func); // Add function node. + addNode(func->getEntryNode()); // Add function entry. + HLNode* cursor = getCursor(); + + addNode(func->getExitNode()); // Add function exit / epilog marker. + addNode(func->getEnd()); // Add function end. + setCursor(cursor); + + return func; +} + +HLNode* Compiler::addNode(HLNode* node) noexcept { + ASMJIT_ASSERT(node != nullptr); + ASMJIT_ASSERT(node->_prev == nullptr); + ASMJIT_ASSERT(node->_next == nullptr); + + if (_cursor == nullptr) { + if (_firstNode == nullptr) { + _firstNode = node; + _lastNode = node; + } + else { + node->_next = _firstNode; + _firstNode->_prev = node; + _firstNode = node; + } + } + else { + HLNode* prev = _cursor; + HLNode* next = _cursor->_next; + + node->_prev = prev; + node->_next = next; + + prev->_next = node; + if (next) + next->_prev = node; + else + _lastNode = node; + } + + _cursor = node; + return node; +} + +HLNode* Compiler::addNodeBefore(HLNode* node, HLNode* ref) noexcept { + ASMJIT_ASSERT(node != nullptr); + ASMJIT_ASSERT(node->_prev == nullptr); + ASMJIT_ASSERT(node->_next == nullptr); + ASMJIT_ASSERT(ref != nullptr); + + HLNode* prev = ref->_prev; + HLNode* next = ref; + + node->_prev = prev; + node->_next = next; + + next->_prev = node; + if (prev) + prev->_next = node; + else + _firstNode = node; + + return node; +} + +HLNode* Compiler::addNodeAfter(HLNode* node, HLNode* ref) noexcept { + ASMJIT_ASSERT(node != nullptr); + ASMJIT_ASSERT(node->_prev == nullptr); + ASMJIT_ASSERT(node->_next == nullptr); + ASMJIT_ASSERT(ref != nullptr); + + HLNode* prev = ref; + HLNode* next = ref->_next; + + node->_prev = prev; + node->_next = next; + + prev->_next = node; + if (next) + next->_prev = node; + else + _lastNode = node; + + return node; +} + +static ASMJIT_INLINE void Compiler_nodeRemoved(Compiler* self, HLNode* node_) noexcept { + if (node_->isJmpOrJcc()) { + HLJump* node = static_cast(node_); + HLLabel* label = node->getTarget(); + + if (label != nullptr) { + // Disconnect. + HLJump** pPrev = &label->_from; + for (;;) { + ASMJIT_ASSERT(*pPrev != nullptr); + HLJump* current = *pPrev; + + if (current == nullptr) + break; + + if (current == node) { + *pPrev = node->_jumpNext; + break; + } + + pPrev = ¤t->_jumpNext; + } + + label->subNumRefs(); + } + } +} + +HLNode* Compiler::removeNode(HLNode* node) noexcept { + HLNode* prev = node->_prev; + HLNode* next = node->_next; + + if (_firstNode == node) + _firstNode = next; + else + prev->_next = next; + + if (_lastNode == node) + _lastNode = prev; + else + next->_prev = prev; + + node->_prev = nullptr; + node->_next = nullptr; + + if (_cursor == node) + _cursor = prev; + Compiler_nodeRemoved(this, node); + + return node; +} + +void Compiler::removeNodes(HLNode* first, HLNode* last) noexcept { + if (first == last) { + removeNode(first); + return; + } + + HLNode* prev = first->_prev; + HLNode* next = last->_next; + + if (_firstNode == first) + _firstNode = next; + else + prev->_next = next; + + if (_lastNode == last) + _lastNode = prev; + else + next->_prev = prev; + + HLNode* node = first; + for (;;) { + HLNode* next = node->getNext(); + ASMJIT_ASSERT(next != nullptr); + + node->_prev = nullptr; + node->_next = nullptr; + + if (_cursor == node) + _cursor = prev; + Compiler_nodeRemoved(this, node); + + if (node == last) + break; + node = next; + } +} + +HLNode* Compiler::setCursor(HLNode* node) noexcept { + HLNode* old = _cursor; + _cursor = node; + return old; +} + +// ============================================================================ +// [asmjit::Compiler - Align] +// ============================================================================ + +Error Compiler::align(uint32_t alignMode, uint32_t offset) noexcept { + HLAlign* node = newAlignNode(alignMode, offset); + if (node == nullptr) + return setLastError(kErrorNoHeapMemory); + + addNode(node); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Compiler - Label] +// ============================================================================ + +HLLabel* Compiler::getHLLabel(uint32_t id) const noexcept { + Assembler* assembler = getAssembler(); + if (assembler == nullptr) return nullptr; + + LabelData* ld = assembler->getLabelData(id); + if (ld->exId == _exId) + return static_cast(ld->exData); + else + return nullptr; +} + +bool Compiler::isLabelValid(uint32_t id) const noexcept { + Assembler* assembler = getAssembler(); + if (assembler == nullptr) return false; + + return static_cast(id) < assembler->getLabelsCount(); +} + +uint32_t Compiler::_newLabelId() noexcept { + HLLabel* node = newLabelNode(); + if (node == nullptr) { + setLastError(kErrorNoHeapMemory); + return kInvalidValue; + } + + return node->getLabelId(); +} + +Error Compiler::bind(const Label& label) noexcept { + HLLabel* node = getHLLabel(label); + if (node == nullptr) + return setLastError(kErrorInvalidState); + addNode(node); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Compiler - Embed] +// ============================================================================ + +Error Compiler::embed(const void* data, uint32_t size) noexcept { + HLData* node = newDataNode(data, size); + if (node == nullptr) + return setLastError(kErrorNoHeapMemory); + + addNode(node); + return kErrorOk; +} + +Error Compiler::embedConstPool(const Label& label, const ConstPool& pool) noexcept { + if (label.getId() == kInvalidValue) + return kErrorInvalidState; + + align(kAlignData, static_cast(pool.getAlignment())); + bind(label); + + HLData* embedNode = newDataNode(nullptr, static_cast(pool.getSize())); + if (embedNode == nullptr) + return kErrorNoHeapMemory; + + pool.fill(embedNode->getData()); + addNode(embedNode); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Compiler - Comment] +// ============================================================================ + +Error Compiler::comment(const char* fmt, ...) noexcept { + char buf[256]; + char* p = buf; + + if (fmt) { + va_list ap; + va_start(ap, fmt); + p += vsnprintf(p, 254, fmt, ap); + va_end(ap); + } + + p[0] = '\0'; + + HLComment* node = newCommentNode(buf); + if (node == nullptr) + return setLastError(kErrorNoHeapMemory); + + addNode(node); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Compiler - Hint] +// ============================================================================ + +Error Compiler::_hint(Var& var, uint32_t hint, uint32_t value) noexcept { + if (var.getId() == kInvalidValue) + return kErrorOk; + + HLHint* node = newHintNode(var, hint, value); + if (node == nullptr) + return setLastError(kErrorNoHeapMemory); + + addNode(node); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Compiler - Vars] +// ============================================================================ + +VarData* Compiler::_newVd(const VarInfo& vi, const char* name) noexcept { + VarData* vd = reinterpret_cast(_varAllocator.alloc(sizeof(VarData))); + if (ASMJIT_UNLIKELY(vd == nullptr)) + goto _NoMemory; + + vd->_name = noName; + vd->_id = OperandUtil::makeVarId(static_cast(_varList.getLength())); + vd->_localId = kInvalidValue; + +#if !defined(ASMJIT_DISABLE_LOGGER) + if (name != nullptr && name[0] != '\0') { + vd->_name = _stringAllocator.sdup(name); + } +#endif // !ASMJIT_DISABLE_LOGGER + + vd->_type = static_cast(vi.getTypeId()); + vd->_class = static_cast(vi.getRegClass()); + vd->_flags = 0; + vd->_priority = 10; + + vd->_state = kVarStateNone; + vd->_regIndex = kInvalidReg; + vd->_isStack = false; + vd->_isMemArg = false; + vd->_isCalculated = false; + vd->_saveOnUnuse = false; + vd->_modified = false; + vd->_reserved0 = 0; + vd->_alignment = static_cast(Utils::iMin(vi.getSize(), 64)); + + vd->_size = vi.getSize(); + vd->_homeMask = 0; + + vd->_memOffset = 0; + vd->_memCell = nullptr; + + vd->rReadCount = 0; + vd->rWriteCount = 0; + vd->mReadCount = 0; + vd->mWriteCount = 0; + + vd->_va = nullptr; + + if (ASMJIT_UNLIKELY(_varList.append(vd) != kErrorOk)) + goto _NoMemory; + return vd; + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +Error Compiler::alloc(Var& var) noexcept { + if (var.getId() == kInvalidValue) + return kErrorOk; + return _hint(var, kVarHintAlloc, kInvalidValue); +} + +Error Compiler::alloc(Var& var, uint32_t regIndex) noexcept { + if (var.getId() == kInvalidValue) + return kErrorOk; + return _hint(var, kVarHintAlloc, regIndex); +} + +Error Compiler::alloc(Var& var, const Reg& reg) noexcept { + if (var.getId() == kInvalidValue) + return kErrorOk; + return _hint(var, kVarHintAlloc, reg.getRegIndex()); +} + +Error Compiler::save(Var& var) noexcept { + if (var.getId() == kInvalidValue) + return kErrorOk; + return _hint(var, kVarHintSave, kInvalidValue); +} + +Error Compiler::spill(Var& var) noexcept { + if (var.getId() == kInvalidValue) + return kErrorOk; + return _hint(var, kVarHintSpill, kInvalidValue); +} + +Error Compiler::unuse(Var& var) noexcept { + if (var.getId() == kInvalidValue) + return kErrorOk; + return _hint(var, kVarHintUnuse, kInvalidValue); +} + +uint32_t Compiler::getPriority(Var& var) const noexcept { + if (var.getId() == kInvalidValue) + return kInvalidValue; + + VarData* vd = getVdById(var.getId()); + return vd->getPriority(); +} + +void Compiler::setPriority(Var& var, uint32_t priority) noexcept { + if (var.getId() == kInvalidValue) + return; + + if (priority > 255) + priority = 255; + + VarData* vd = getVdById(var.getId()); + vd->_priority = static_cast(priority); +} + +bool Compiler::getSaveOnUnuse(Var& var) const noexcept { + if (var.getId() == kInvalidValue) + return false; + + VarData* vd = getVdById(var.getId()); + return static_cast(vd->_saveOnUnuse); +} + +void Compiler::setSaveOnUnuse(Var& var, bool value) noexcept { + if (var.getId() == kInvalidValue) + return; + + VarData* vd = getVdById(var.getId()); + vd->_saveOnUnuse = value; +} + +void Compiler::rename(Var& var, const char* fmt, ...) noexcept { + if (var.getId() == kInvalidValue) + return; + + VarData* vd = getVdById(var.getId()); + vd->_name = noName; + + if (fmt != nullptr && fmt[0] != '\0') { + char buf[64]; + + va_list ap; + va_start(ap, fmt); + + vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap); + buf[ASMJIT_ARRAY_SIZE(buf) - 1] = '\0'; + + vd->_name = _stringAllocator.sdup(buf); + va_end(ap); + } +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER diff --git a/DynamicHooks/thirdparty/AsmJit/base/compiler.h b/DynamicHooks/thirdparty/AsmJit/base/compiler.h new file mode 100644 index 0000000..e9eafd1 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/compiler.h @@ -0,0 +1,576 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_COMPILER_H +#define _ASMJIT_BASE_COMPILER_H + +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/assembler.h" +#include "../base/compilerfunc.h" +#include "../base/constpool.h" +#include "../base/containers.h" +#include "../base/hlstream.h" +#include "../base/operand.h" +#include "../base/podvector.h" +#include "../base/utils.h" +#include "../base/zone.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct VarAttr; +struct VarData; +struct VarMap; +struct VarState; + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::CompilerFeatures] +// ============================================================================ + +ASMJIT_ENUM(CompilerFeatures) { + //! Schedule instructions so they can be executed faster (`Compiler` only). + //! + //! Default `false` - has to be explicitly enabled as the scheduler needs + //! some time to run. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! If scheduling is enabled AsmJit will try to reorder instructions to + //! minimize the dependency chain. Scheduler always runs after the registers + //! are allocated so it doesn't change count of register allocs/spills. + //! + //! This feature is highly experimental and untested. + kCompilerFeatureEnableScheduler = 0 +}; + +// ============================================================================ +// [asmjit::ConstScope] +// ============================================================================ + +//! Scope of the constant. +ASMJIT_ENUM(ConstScope) { + //! Local constant, always embedded right after the current function. + kConstScopeLocal = 0, + //! Global constant, embedded at the end of the currently compiled code. + kConstScopeGlobal = 1 +}; + +// ============================================================================ +// [asmjit::VarInfo] +// ============================================================================ + +struct VarInfo { + // ============================================================================ + // [Flags] + // ============================================================================ + + //! \internal + //! + //! Variable flags. + ASMJIT_ENUM(Flags) { + //! Variable contains one or more single-precision floating point. + kFlagSP = 0x10, + //! Variable contains one or more double-precision floating point. + kFlagDP = 0x20, + //! Variable is a vector, contains packed data. + kFlagSIMD = 0x80 + }; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get type id. + ASMJIT_INLINE uint32_t getTypeId() const noexcept { return _typeId; } + //! Get type name. + ASMJIT_INLINE const char* getTypeName() const noexcept { return _typeName; } + + //! Get register size in bytes. + ASMJIT_INLINE uint32_t getSize() const noexcept { return _size; } + //! Get variable class, see \ref RegClass. + ASMJIT_INLINE uint32_t getRegClass() const noexcept { return _regClass; } + //! Get register type, see `X86RegType`. + ASMJIT_INLINE uint32_t getRegType() const noexcept { return _regType; } + //! Get type flags, see `VarFlag`. + ASMJIT_INLINE uint32_t getFlags() const noexcept { return _flags; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Variable type id. + uint8_t _typeId; + //! Variable and register size (in bytes). + uint8_t _size; + //! Register class, see `RegClass`. + uint8_t _regClass; + //! Register type the variable is mapped to. + uint8_t _regType; + + //! Variable info flags, see \ref Flags. + uint32_t _flags; + + //! Variable type name. + char _typeName[8]; +}; + +// ============================================================================ +// [asmjit::Compiler] +// ============================================================================ + +//! Compiler interface. +//! +//! \sa Assembler. +class ASMJIT_VIRTAPI Compiler : public ExternalTool { + public: + ASMJIT_NO_COPY(Compiler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `Compiler` instance. + ASMJIT_API Compiler() noexcept; + //! Destroy the `Compiler` instance. + ASMJIT_API virtual ~Compiler() noexcept; + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! \override + ASMJIT_API virtual void reset(bool releaseMemory) noexcept; + + // -------------------------------------------------------------------------- + // [Compiler Features] + // -------------------------------------------------------------------------- + + //! Get code-generator features. + ASMJIT_INLINE uint32_t getFeatures() const noexcept { + return _features; + } + //! Set code-generator features. + ASMJIT_INLINE void setFeatures(uint32_t features) noexcept { + _features = features; + } + + //! Get code-generator `feature`. + ASMJIT_INLINE bool hasFeature(uint32_t feature) const noexcept { + ASMJIT_ASSERT(feature < 32); + return (_features & (1 << feature)) != 0; + } + + //! Set code-generator `feature` to `value`. + ASMJIT_INLINE void setFeature(uint32_t feature, bool value) noexcept { + ASMJIT_ASSERT(feature < 32); + feature = static_cast(value) << feature; + _features = (_features & ~feature) | feature; + } + + //! Get maximum look ahead. + ASMJIT_INLINE uint32_t getMaxLookAhead() const noexcept { + return _maxLookAhead; + } + //! Set maximum look ahead to `val`. + ASMJIT_INLINE void setMaxLookAhead(uint32_t val) noexcept { + _maxLookAhead = val; + } + + // -------------------------------------------------------------------------- + // [Token ID] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Reset the token-id generator. + ASMJIT_INLINE void _resetTokenGenerator() noexcept { + _tokenGenerator = 0; + } + + //! \internal + //! + //! Generate a new unique token id. + ASMJIT_INLINE uint32_t _generateUniqueToken() noexcept { + return ++_tokenGenerator; + } + + // -------------------------------------------------------------------------- + // [Instruction Options] + // -------------------------------------------------------------------------- + + //! Get options of the next instruction. + ASMJIT_INLINE uint32_t getInstOptions() const noexcept { + return _instOptions; + } + //! Set options of the next instruction. + ASMJIT_INLINE void setInstOptions(uint32_t instOptions) noexcept { + _instOptions = instOptions; + } + + //! Get options of the next instruction and reset them. + ASMJIT_INLINE uint32_t getInstOptionsAndReset() { + uint32_t instOptions = _instOptions; + _instOptions = 0; + return instOptions; + }; + + // -------------------------------------------------------------------------- + // [Node-Factory] + // -------------------------------------------------------------------------- + + //! \internal + template + ASMJIT_INLINE T* newNode() noexcept { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this); + } + + //! \internal + template + ASMJIT_INLINE T* newNode(P0 p0) noexcept { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this, p0); + } + + //! \internal + template + ASMJIT_INLINE T* newNode(P0 p0, P1 p1) noexcept { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this, p0, p1); + } + + //! \internal + template + ASMJIT_INLINE T* newNode(P0 p0, P1 p1, P2 p2) noexcept { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this, p0, p1, p2); + } + + //! \internal + //! + //! Create a new `HLData` node. + ASMJIT_API HLData* newDataNode(const void* data, uint32_t size) noexcept; + + //! \internal + //! + //! Create a new `HLAlign` node. + ASMJIT_API HLAlign* newAlignNode(uint32_t alignMode, uint32_t offset) noexcept; + + //! \internal + //! + //! Create a new `HLLabel` node. + ASMJIT_API HLLabel* newLabelNode() noexcept; + + //! \internal + //! + //! Create a new `HLComment`. + ASMJIT_API HLComment* newCommentNode(const char* str) noexcept; + + //! \internal + //! + //! Create a new `HLHint`. + ASMJIT_API HLHint* newHintNode(Var& var, uint32_t hint, uint32_t value) noexcept; + + // -------------------------------------------------------------------------- + // [Code-Stream] + // -------------------------------------------------------------------------- + + //! Add a function `node` to the stream. + ASMJIT_API HLNode* addFunc(HLFunc* func) noexcept; + + //! Add node `node` after current and set current to `node`. + ASMJIT_API HLNode* addNode(HLNode* node) noexcept; + //! Insert `node` before `ref`. + ASMJIT_API HLNode* addNodeBefore(HLNode* node, HLNode* ref) noexcept; + //! Insert `node` after `ref`. + ASMJIT_API HLNode* addNodeAfter(HLNode* node, HLNode* ref) noexcept; + //! Remove `node`. + ASMJIT_API HLNode* removeNode(HLNode* node) noexcept; + //! Remove multiple nodes. + ASMJIT_API void removeNodes(HLNode* first, HLNode* last) noexcept; + + //! Get the first node. + ASMJIT_INLINE HLNode* getFirstNode() const noexcept { return _firstNode; } + //! Get the last node. + ASMJIT_INLINE HLNode* getLastNode() const noexcept { return _lastNode; } + + //! Get current node. + //! + //! \note If this method returns `nullptr` it means that nothing has been + //! emitted yet. + ASMJIT_INLINE HLNode* getCursor() const noexcept { return _cursor; } + //! \internal + //! + //! Set the current node without returning the previous node. + ASMJIT_INLINE void _setCursor(HLNode* node) noexcept { _cursor = node; } + //! Set the current node to `node` and return the previous one. + ASMJIT_API HLNode* setCursor(HLNode* node) noexcept; + + // -------------------------------------------------------------------------- + // [Func] + // -------------------------------------------------------------------------- + + //! Get current function. + ASMJIT_INLINE HLFunc* getFunc() const noexcept { return _func; } + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + //! Align target buffer to the `offset` specified. + //! + //! The sequence that is used to fill the gap between the aligned location + //! and the current depends on `alignMode`, see \ref AlignMode. + ASMJIT_API Error align(uint32_t alignMode, uint32_t offset) noexcept; + + // -------------------------------------------------------------------------- + // [Label] + // -------------------------------------------------------------------------- + + //! Get `HLLabel` by `id`. + //! + //! NOTE: The label has to be valid, see `isLabelValid()`. + ASMJIT_API HLLabel* getHLLabel(uint32_t id) const noexcept; + + //! Get `HLLabel` by `label`. + //! + //! NOTE: The label has to be valid, see `isLabelValid()`. + ASMJIT_INLINE HLLabel* getHLLabel(const Label& label) noexcept { + return getHLLabel(label.getId()); + } + + //! Get whether the label `id` is valid. + ASMJIT_API bool isLabelValid(uint32_t id) const noexcept; + //! Get whether the `label` is valid. + ASMJIT_INLINE bool isLabelValid(const Label& label) const noexcept { + return isLabelValid(label.getId()); + } + + //! \internal + //! + //! Create a new label and return its ID. + ASMJIT_API uint32_t _newLabelId() noexcept; + + //! Create and return a new `Label`. + ASMJIT_INLINE Label newLabel() noexcept { return Label(_newLabelId()); } + + //! Bind label to the current offset. + //! + //! NOTE: Label can be bound only once! + ASMJIT_API Error bind(const Label& label) noexcept; + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! Embed data. + ASMJIT_API Error embed(const void* data, uint32_t size) noexcept; + + //! Embed a constant pool data, adding the following in order: + //! 1. Data alignment. + //! 2. Label. + //! 3. Constant pool data. + ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) noexcept; + + // -------------------------------------------------------------------------- + // [Comment] + // -------------------------------------------------------------------------- + + //! Emit a single comment line. + ASMJIT_API Error comment(const char* fmt, ...) noexcept; + + // -------------------------------------------------------------------------- + // [Hint] + // -------------------------------------------------------------------------- + + //! Emit a new hint (purery informational node). + ASMJIT_API Error _hint(Var& var, uint32_t hint, uint32_t value) noexcept; + + // -------------------------------------------------------------------------- + // [Vars] + // -------------------------------------------------------------------------- + + //! Get whether variable `var` is created. + ASMJIT_INLINE bool isVarValid(const Var& var) const noexcept { + return static_cast(var.getId() & Operand::kIdIndexMask) < _varList.getLength(); + } + + //! \internal + //! + //! Get `VarData` by `var`. + ASMJIT_INLINE VarData* getVd(const Var& var) const noexcept { + return getVdById(var.getId()); + } + + //! \internal + //! + //! Get `VarData` by `id`. + ASMJIT_INLINE VarData* getVdById(uint32_t id) const noexcept { + ASMJIT_ASSERT(id != kInvalidValue); + ASMJIT_ASSERT(static_cast(id & Operand::kIdIndexMask) < _varList.getLength()); + + return _varList[id & Operand::kIdIndexMask]; + } + + //! \internal + //! + //! Get an array of 'VarData*'. + ASMJIT_INLINE VarData** _getVdArray() const noexcept { + return const_cast(_varList.getData()); + } + + //! \internal + //! + //! Create a new `VarData`. + ASMJIT_API VarData* _newVd(const VarInfo& vi, const char* name) noexcept; + + //! Alloc variable `var`. + ASMJIT_API Error alloc(Var& var) noexcept; + //! Alloc variable `var` using `regIndex` as a register index. + ASMJIT_API Error alloc(Var& var, uint32_t regIndex) noexcept; + //! Alloc variable `var` using `reg` as a register operand. + ASMJIT_API Error alloc(Var& var, const Reg& reg) noexcept; + //! Spill variable `var`. + ASMJIT_API Error spill(Var& var) noexcept; + //! Save variable `var` if the status is `modified` at this point. + ASMJIT_API Error save(Var& var) noexcept; + //! Unuse variable `var`. + ASMJIT_API Error unuse(Var& var) noexcept; + + //! Get priority of variable `var`. + ASMJIT_API uint32_t getPriority(Var& var) const noexcept; + //! Set priority of variable `var` to `priority`. + ASMJIT_API void setPriority(Var& var, uint32_t priority) noexcept; + + //! Get save-on-unuse `var` property. + ASMJIT_API bool getSaveOnUnuse(Var& var) const noexcept; + //! Set save-on-unuse `var` property to `value`. + ASMJIT_API void setSaveOnUnuse(Var& var, bool value) noexcept; + + //! Rename variable `var` to `name`. + //! + //! NOTE: Only new name will appear in the logger. + ASMJIT_API void rename(Var& var, const char* fmt, ...) noexcept; + + // -------------------------------------------------------------------------- + // [Stack] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Create a new memory chunk allocated on the current function's stack. + virtual Error _newStack(BaseMem* mem, uint32_t size, uint32_t alignment, const char* name) noexcept = 0; + + // -------------------------------------------------------------------------- + // [Const] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Put data to a constant-pool and get a memory reference to it. + virtual Error _newConst(BaseMem* mem, uint32_t scope, const void* data, size_t size) noexcept = 0; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Code-Generation features, used by \ref hasFeature() and \ref setFeature(). + uint32_t _features; + //! Maximum count of nodes to look ahead when allocating/spilling + //! registers. + uint32_t _maxLookAhead; + + //! Options affecting the next instruction. + uint32_t _instOptions; + //! Processing token generator. + //! + //! Used to get a unique token that is then used to process `HLNode`s. See + //! `Compiler::_getUniqueToken()` for more details. + uint32_t _tokenGenerator; + + //! Flow id added to each node created (used only by `Context)`. + uint32_t _nodeFlowId; + //! Flags added to each node created (used only by `Context)`. + uint32_t _nodeFlags; + + //! Variable mapping (translates incoming VarType into target). + const uint8_t* _targetVarMapping; + + //! First node. + HLNode* _firstNode; + //! Last node. + HLNode* _lastNode; + + //! Current node. + HLNode* _cursor; + //! Current function. + HLFunc* _func; + + //! General purpose zone allocator. + Zone _zoneAllocator; + //! Variable zone. + Zone _varAllocator; + //! String/data zone. + Zone _stringAllocator; + //! Local constant pool zone. + Zone _constAllocator; + + //! VarData list. + PodVector _varList; + + //! Local constant pool, flushed at the end of each function. + ConstPool _localConstPool; + //! Global constant pool, flushed at the end of the compilation. + ConstPool _globalConstPool; + + //! Label to start of the local constant pool. + Label _localConstPoolLabel; + //! Label to start of the global constant pool. + Label _globalConstPoolLabel; +}; + +//! \} + +// ============================================================================ +// [Defined-Later] +// ============================================================================ + +ASMJIT_INLINE HLNode::HLNode(Compiler* compiler, uint32_t type) noexcept { + _prev = nullptr; + _next = nullptr; + _type = static_cast(type); + _opCount = 0; + _flags = static_cast(compiler->_nodeFlags); + _flowId = compiler->_nodeFlowId; + _tokenId = 0; + _comment = nullptr; + _map = nullptr; + _liveness = nullptr; + _state = nullptr; +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER +#endif // _ASMJIT_BASE_COMPILER_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/compilercontext.cpp b/DynamicHooks/thirdparty/AsmJit/base/compilercontext.cpp new file mode 100644 index 0000000..664314c --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/compilercontext.cpp @@ -0,0 +1,653 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/compilercontext_p.h" +#include "../base/utils.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::Context - Construction / Destruction] +// ============================================================================ + +Context::Context(Compiler* compiler) : + _compiler(compiler), + _zoneAllocator(8192 - Zone::kZoneOverhead), + _traceNode(nullptr), + _varMapToVaListOffset(0) { + + Context::reset(); +} +Context::~Context() {} + +// ============================================================================ +// [asmjit::Context - Reset] +// ============================================================================ + +void Context::reset(bool releaseMemory) { + _zoneAllocator.reset(releaseMemory); + + _func = nullptr; + _start = nullptr; + _end = nullptr; + _extraBlock = nullptr; + _stop = nullptr; + + _unreachableList.reset(); + _returningList.reset(); + _jccList.reset(); + _contextVd.reset(releaseMemory); + + _memVarCells = nullptr; + _memStackCells = nullptr; + + _mem1ByteVarsUsed = 0; + _mem2ByteVarsUsed = 0; + _mem4ByteVarsUsed = 0; + _mem8ByteVarsUsed = 0; + _mem16ByteVarsUsed = 0; + _mem32ByteVarsUsed = 0; + _mem64ByteVarsUsed = 0; + _memStackCellsUsed = 0; + + _memMaxAlign = 0; + _memVarTotal = 0; + _memStackTotal = 0; + _memAllTotal = 0; + _annotationLength = 12; + + _state = nullptr; +} + +// ============================================================================ +// [asmjit::Context - Mem] +// ============================================================================ + +static ASMJIT_INLINE uint32_t BaseContext_getDefaultAlignment(uint32_t size) { + if (size > 32) + return 64; + else if (size > 16) + return 32; + else if (size > 8) + return 16; + else if (size > 4) + return 8; + else if (size > 2) + return 4; + else if (size > 1) + return 2; + else + return 1; +} + +VarCell* Context::_newVarCell(VarData* vd) { + ASMJIT_ASSERT(vd->_memCell == nullptr); + + VarCell* cell; + uint32_t size = vd->getSize(); + + if (vd->isStack()) { + cell = _newStackCell(size, vd->getAlignment()); + + if (cell == nullptr) + return nullptr; + } + else { + cell = static_cast(_zoneAllocator.alloc(sizeof(VarCell))); + if (cell == nullptr) + goto _NoMemory; + + cell->_next = _memVarCells; + _memVarCells = cell; + + cell->_offset = 0; + cell->_size = size; + cell->_alignment = size; + + _memMaxAlign = Utils::iMax(_memMaxAlign, size); + _memVarTotal += size; + + switch (size) { + case 1: _mem1ByteVarsUsed++ ; break; + case 2: _mem2ByteVarsUsed++ ; break; + case 4: _mem4ByteVarsUsed++ ; break; + case 8: _mem8ByteVarsUsed++ ; break; + case 16: _mem16ByteVarsUsed++; break; + case 32: _mem32ByteVarsUsed++; break; + case 64: _mem64ByteVarsUsed++; break; + + default: + ASMJIT_NOT_REACHED(); + } + } + + vd->_memCell = cell; + return cell; + +_NoMemory: + _compiler->setLastError(kErrorNoHeapMemory); + return nullptr; +} + +VarCell* Context::_newStackCell(uint32_t size, uint32_t alignment) { + VarCell* cell = static_cast(_zoneAllocator.alloc(sizeof(VarCell))); + if (cell == nullptr) + goto _NoMemory; + + if (alignment == 0) + alignment = BaseContext_getDefaultAlignment(size); + + if (alignment > 64) + alignment = 64; + + ASMJIT_ASSERT(Utils::isPowerOf2(alignment)); + size = Utils::alignTo(size, alignment); + + // Insert it sorted according to the alignment and size. + { + VarCell** pPrev = &_memStackCells; + VarCell* cur = *pPrev; + + while (cur != nullptr) { + if ((cur->getAlignment() > alignment) || + (cur->getAlignment() == alignment && cur->getSize() > size)) { + pPrev = &cur->_next; + cur = *pPrev; + continue; + } + + break; + } + + cell->_next = cur; + cell->_offset = 0; + cell->_size = size; + cell->_alignment = alignment; + + *pPrev = cell; + _memStackCellsUsed++; + + _memMaxAlign = Utils::iMax(_memMaxAlign, alignment); + _memStackTotal += size; + } + + return cell; + +_NoMemory: + _compiler->setLastError(kErrorNoHeapMemory); + return nullptr; +} + +Error Context::resolveCellOffsets() { + VarCell* varCell = _memVarCells; + VarCell* stackCell = _memStackCells; + + uint32_t stackAlignment = 0; + if (stackCell != nullptr) + stackAlignment = stackCell->getAlignment(); + + uint32_t pos64 = 0; + uint32_t pos32 = pos64 + _mem64ByteVarsUsed * 64; + uint32_t pos16 = pos32 + _mem32ByteVarsUsed * 32; + uint32_t pos8 = pos16 + _mem16ByteVarsUsed * 16; + uint32_t pos4 = pos8 + _mem8ByteVarsUsed * 8 ; + uint32_t pos2 = pos4 + _mem4ByteVarsUsed * 4 ; + uint32_t pos1 = pos2 + _mem2ByteVarsUsed * 2 ; + + uint32_t stackPos = pos1 + _mem1ByteVarsUsed; + + uint32_t gapAlignment = stackAlignment; + uint32_t gapSize = 0; + + // TODO: Not used! + if (gapAlignment) + Utils::alignDiff(stackPos, gapAlignment); + stackPos += gapSize; + + uint32_t gapPos = stackPos; + uint32_t allTotal = stackPos; + + // Vars - Allocated according to alignment/width. + while (varCell != nullptr) { + uint32_t size = varCell->getSize(); + uint32_t offset = 0; + + switch (size) { + case 1: offset = pos1 ; pos1 += 1 ; break; + case 2: offset = pos2 ; pos2 += 2 ; break; + case 4: offset = pos4 ; pos4 += 4 ; break; + case 8: offset = pos8 ; pos8 += 8 ; break; + case 16: offset = pos16; pos16 += 16; break; + case 32: offset = pos32; pos32 += 32; break; + case 64: offset = pos64; pos64 += 64; break; + + default: + ASMJIT_NOT_REACHED(); + } + + varCell->setOffset(static_cast(offset)); + varCell = varCell->_next; + } + + // Stack - Allocated according to alignment/width. + while (stackCell != nullptr) { + uint32_t size = stackCell->getSize(); + uint32_t alignment = stackCell->getAlignment(); + uint32_t offset; + + // Try to fill the gap between variables/stack first. + if (size <= gapSize && alignment <= gapAlignment) { + offset = gapPos; + + gapSize -= size; + gapPos -= size; + + if (alignment < gapAlignment) + gapAlignment = alignment; + } + else { + offset = stackPos; + + stackPos += size; + allTotal += size; + } + + stackCell->setOffset(offset); + stackCell = stackCell->_next; + } + + _memAllTotal = allTotal; + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Context - RemoveUnreachableCode] +// ============================================================================ + +Error Context::removeUnreachableCode() { + Compiler* compiler = getCompiler(); + + PodList::Link* link = _unreachableList.getFirst(); + HLNode* stop = getStop(); + + while (link != nullptr) { + HLNode* node = link->getValue(); + if (node != nullptr && node->getPrev() != nullptr && node != stop) { + // Locate all unreachable nodes. + HLNode* first = node; + do { + if (node->isFetched()) + break; + node = node->getNext(); + } while (node != stop); + + // Remove unreachable nodes that are neither informative nor directives. + if (node != first) { + HLNode* end = node; + node = first; + + // NOTE: The strategy is as follows: + // 1. The algorithm removes everything until it finds a first label. + // 2. After the first label is found it removes only removable nodes. + bool removeEverything = true; + do { + HLNode* next = node->getNext(); + bool remove = node->isRemovable(); + + if (!remove) { + if (node->isLabel()) + removeEverything = false; + remove = removeEverything; + } + + if (remove) { + ASMJIT_TSEC({ + this->_traceNode(this, node, "[REMOVED UNREACHABLE] "); + }); + compiler->removeNode(node); + } + + node = next; + } while (node != end); + } + } + + link = link->getNext(); + } + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Context - Liveness Analysis] +// ============================================================================ + +//! \internal +struct LivenessTarget { + //! Previous target. + LivenessTarget* prev; + + //! Target node. + HLLabel* node; + //! Jumped from. + HLJump* from; +}; + +Error Context::livenessAnalysis() { + uint32_t bLen = static_cast( + ((_contextVd.getLength() + BitArray::kEntityBits - 1) / BitArray::kEntityBits)); + + // No variables. + if (bLen == 0) + return kErrorOk; + + HLFunc* func = getFunc(); + HLJump* from = nullptr; + + LivenessTarget* ltCur = nullptr; + LivenessTarget* ltUnused = nullptr; + + PodList::Link* retPtr = _returningList.getFirst(); + ASMJIT_ASSERT(retPtr != nullptr); + + HLNode* node = retPtr->getValue(); + + size_t varMapToVaListOffset = _varMapToVaListOffset; + BitArray* bCur = newBits(bLen); + + if (bCur == nullptr) + goto _NoMemory; + + // Allocate bits for code visited first time. +_OnVisit: + for (;;) { + if (node->hasLiveness()) { + if (bCur->_addBitsDelSource(node->getLiveness(), bCur, bLen)) + goto _OnPatch; + else + goto _OnDone; + } + + BitArray* bTmp = copyBits(bCur, bLen); + if (bTmp == nullptr) + goto _NoMemory; + + node->setLiveness(bTmp); + VarMap* map = node->getMap(); + + if (map != nullptr) { + uint32_t vaCount = map->getVaCount(); + VarAttr* vaList = reinterpret_cast(((uint8_t*)map) + varMapToVaListOffset); + + for (uint32_t i = 0; i < vaCount; i++) { + VarAttr* va = &vaList[i]; + VarData* vd = va->getVd(); + + uint32_t flags = va->getFlags(); + uint32_t localId = vd->getLocalId(); + + if ((flags & kVarAttrWAll) && !(flags & kVarAttrRAll)) { + // Write-Only. + bTmp->setBit(localId); + bCur->delBit(localId); + } + else { + // Read-Only or Read/Write. + bTmp->setBit(localId); + bCur->setBit(localId); + } + } + } + + if (node->getType() == HLNode::kTypeLabel) + goto _OnTarget; + + if (node == func) + goto _OnDone; + + ASMJIT_ASSERT(node->getPrev()); + node = node->getPrev(); + } + + // Patch already generated liveness bits. +_OnPatch: + for (;;) { + ASMJIT_ASSERT(node->hasLiveness()); + BitArray* bNode = node->getLiveness(); + + if (!bNode->_addBitsDelSource(bCur, bLen)) + goto _OnDone; + + if (node->getType() == HLNode::kTypeLabel) + goto _OnTarget; + + if (node == func) + goto _OnDone; + + node = node->getPrev(); + } + +_OnTarget: + if (static_cast(node)->getNumRefs() != 0) { + // Push a new LivenessTarget onto the stack if needed. + if (ltCur == nullptr || ltCur->node != node) { + // Allocate a new LivenessTarget object (from pool or zone). + LivenessTarget* ltTmp = ltUnused; + + if (ltTmp != nullptr) { + ltUnused = ltUnused->prev; + } + else { + ltTmp = _zoneAllocator.allocT( + sizeof(LivenessTarget) - sizeof(BitArray) + bLen * sizeof(uintptr_t)); + + if (ltTmp == nullptr) + goto _NoMemory; + } + + // Initialize and make current - ltTmp->from will be set later on. + ltTmp->prev = ltCur; + ltTmp->node = static_cast(node); + ltCur = ltTmp; + + from = static_cast(node)->getFrom(); + ASMJIT_ASSERT(from != nullptr); + } + else { + from = ltCur->from; + goto _OnJumpNext; + } + + // Visit/Patch. + do { + ltCur->from = from; + bCur->copyBits(node->getLiveness(), bLen); + + if (!from->hasLiveness()) { + node = from; + goto _OnVisit; + } + + // Issue #25: Moved '_OnJumpNext' here since it's important to patch + // code again if there are more live variables than before. +_OnJumpNext: + if (bCur->delBits(from->getLiveness(), bLen)) { + node = from; + goto _OnPatch; + } + + from = from->getJumpNext(); + } while (from != nullptr); + + // Pop the current LivenessTarget from the stack. + { + LivenessTarget* ltTmp = ltCur; + + ltCur = ltCur->prev; + ltTmp->prev = ltUnused; + ltUnused = ltTmp; + } + } + + bCur->copyBits(node->getLiveness(), bLen); + node = node->getPrev(); + + if (node->isJmp() || !node->isFetched()) + goto _OnDone; + + if (!node->hasLiveness()) + goto _OnVisit; + + if (bCur->delBits(node->getLiveness(), bLen)) + goto _OnPatch; + +_OnDone: + if (ltCur != nullptr) { + node = ltCur->node; + from = ltCur->from; + + goto _OnJumpNext; + } + + retPtr = retPtr->getNext(); + if (retPtr != nullptr) { + node = retPtr->getValue(); + goto _OnVisit; + } + + return kErrorOk; + +_NoMemory: + return setLastError(kErrorNoHeapMemory); +} + +// ============================================================================ +// [asmjit::Context - Annotate] +// ============================================================================ + +Error Context::formatInlineComment(StringBuilder& dst, HLNode* node) { +#if !defined(ASMJIT_DISABLE_LOGGER) + if (node->getComment()) + dst.appendString(node->getComment()); + + if (node->hasLiveness()) { + if (dst.getLength() < _annotationLength) + dst.appendChars(' ', _annotationLength - dst.getLength()); + + uint32_t vdCount = static_cast(_contextVd.getLength()); + size_t offset = dst.getLength() + 1; + + dst.appendChar('['); + dst.appendChars(' ', vdCount); + dst.appendChar(']'); + + BitArray* liveness = node->getLiveness(); + VarMap* map = node->getMap(); + + uint32_t i; + for (i = 0; i < vdCount; i++) { + if (liveness->getBit(i)) + dst.getData()[offset + i] = '.'; + } + + if (map != nullptr) { + uint32_t vaCount = map->getVaCount(); + VarAttr* vaList = reinterpret_cast(((uint8_t*)map) + _varMapToVaListOffset); + + for (i = 0; i < vaCount; i++) { + VarAttr* va = &vaList[i]; + VarData* vd = va->getVd(); + + uint32_t flags = va->getFlags(); + char c = 'u'; + + if ( (flags & kVarAttrRAll) && !(flags & kVarAttrWAll)) c = 'r'; + if (!(flags & kVarAttrRAll) && (flags & kVarAttrWAll)) c = 'w'; + if ( (flags & kVarAttrRAll) && (flags & kVarAttrWAll)) c = 'x'; + + // Uppercase if unused. + if ((flags & kVarAttrUnuse)) + c -= 'a' - 'A'; + + ASMJIT_ASSERT(offset + vd->getLocalId() < dst.getLength()); + dst._data[offset + vd->getLocalId()] = c; + } + } + } +#endif // !ASMJIT_DISABLE_LOGGER + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::Context - Cleanup] +// ============================================================================ + +void Context::cleanup() { + VarData** array = _contextVd.getData(); + size_t length = _contextVd.getLength(); + + for (size_t i = 0; i < length; i++) { + VarData* vd = array[i]; + vd->resetLocalId(); + vd->resetRegIndex(); + } + + _contextVd.reset(false); + _extraBlock = nullptr; +} + +// ============================================================================ +// [asmjit::Context - CompileFunc] +// ============================================================================ + +Error Context::compile(HLFunc* func) { + HLNode* end = func->getEnd(); + HLNode* stop = end->getNext(); + + _func = func; + _stop = stop; + _extraBlock = end; + + ASMJIT_PROPAGATE_ERROR(fetch()); + ASMJIT_PROPAGATE_ERROR(removeUnreachableCode()); + ASMJIT_PROPAGATE_ERROR(livenessAnalysis()); + + Compiler* compiler = getCompiler(); + +#if !defined(ASMJIT_DISABLE_LOGGER) + if (compiler->getAssembler()->hasLogger()) + ASMJIT_PROPAGATE_ERROR(annotate()); +#endif // !ASMJIT_DISABLE_LOGGER + + ASMJIT_PROPAGATE_ERROR(translate()); + + // We alter the compiler cursor, because it doesn't make sense to reference + // it after compilation - some nodes may disappear and it's forbidden to add + // new code after the compilation is done. + compiler->_setCursor(nullptr); + + return kErrorOk; +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER diff --git a/DynamicHooks/thirdparty/AsmJit/base/compilercontext_p.h b/DynamicHooks/thirdparty/AsmJit/base/compilercontext_p.h new file mode 100644 index 0000000..ac06e97 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/compilercontext_p.h @@ -0,0 +1,901 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_COMPILERCONTEXT_P_H +#define _ASMJIT_BASE_COMPILERCONTEXT_P_H + +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/compiler.h" +#include "../base/podvector.h" +#include "../base/zone.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::VarAttrFlags] +// ============================================================================ + +//! \internal +//! +//! Variable attribute flags. +ASMJIT_ENUM(VarAttrFlags) { + //! Read from register. + kVarAttrRReg = 0x00000001, + //! Write to register. + kVarAttrWReg = 0x00000002, + //! Read/Write from/to register. + kVarAttrXReg = 0x00000003, + + //! Read from memory. + kVarAttrRMem = 0x00000004, + //! Write to memory. + kVarAttrWMem = 0x00000008, + //! Read/Write from/to memory. + kVarAttrXMem = 0x0000000C, + + //! Register allocator can decide if input will be in register or memory. + kVarAttrRDecide = 0x00000010, + //! Register allocator can decide if output will be in register or memory. + kVarAttrWDecide = 0x00000020, + //! Register allocator can decide if in/out will be in register or memory. + kVarAttrXDecide = 0x00000030, + + //! Variable is converted to other type/class on the input. + kVarAttrRConv = 0x00000040, + //! Variable is converted from other type/class on the output. + kVarAttrWConv = 0x00000080, + //! Combination of `kVarAttrRConv` and `kVarAttrWConv`. + kVarAttrXConv = 0x000000C0, + + //! Variable is a function call operand. + kVarAttrRCall = 0x00000100, + //! Variable is a function argument passed in register. + kVarAttrRFunc = 0x00000200, + //! Variable is a function return value passed in register. + kVarAttrWFunc = 0x00000400, + + //! Variable should be spilled. + kVarAttrSpill = 0x00000800, + //! Variable should be unused at the end of the instruction/node. + kVarAttrUnuse = 0x00001000, + + //! All in-flags. + kVarAttrRAll = kVarAttrRReg | kVarAttrRMem | kVarAttrRDecide | kVarAttrRCall | kVarAttrRFunc, + //! All out-flags. + kVarAttrWAll = kVarAttrWReg | kVarAttrWMem | kVarAttrWDecide | kVarAttrWFunc, + + //! Variable is already allocated on the input. + kVarAttrAllocRDone = 0x00400000, + //! Variable is already allocated on the output. + kVarAttrAllocWDone = 0x00800000, + + kVarAttrX86GpbLo = 0x10000000, + kVarAttrX86GpbHi = 0x20000000, + kVarAttrX86Fld4 = 0x40000000, + kVarAttrX86Fld8 = 0x80000000 +}; + +// ============================================================================ +// [asmjit::VarHint] +// ============================================================================ + +//! \internal +//! +//! Variable hint (used by `Compiler)`. +//! +//! \sa Compiler. +ASMJIT_ENUM(VarHint) { + //! Alloc variable. + kVarHintAlloc = 0, + //! Spill variable. + kVarHintSpill = 1, + //! Save variable if modified. + kVarHintSave = 2, + //! Save variable if modified and mark it as unused. + kVarHintSaveAndUnuse = 3, + //! Mark variable as unused. + kVarHintUnuse = 4 +}; + +// ============================================================================ +// [asmjit::kVarState] +// ============================================================================ + +// TODO: Rename `kVarState` or `VarState`. + +//! \internal +//! +//! State of variable. +//! +//! NOTE: Variable states are used only during register allocation. +ASMJIT_ENUM(kVarState) { + //! Variable is currently not used. + kVarStateNone = 0, + //! Variable is currently allocated in register. + kVarStateReg = 1, + //! Variable is currently allocated in memory (or has been spilled). + kVarStateMem = 2 +}; + +// ============================================================================ +// [asmjit::VarCell] +// ============================================================================ + +struct VarCell { + ASMJIT_NO_COPY(VarCell) + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get cell offset. + ASMJIT_INLINE int32_t getOffset() const { return _offset; } + //! Set cell offset. + ASMJIT_INLINE void setOffset(int32_t offset) { _offset = offset; } + + //! Get cell size. + ASMJIT_INLINE uint32_t getSize() const { return _size; } + //! Set cell size. + ASMJIT_INLINE void setSize(uint32_t size) { _size = size; } + + //! Get cell alignment. + ASMJIT_INLINE uint32_t getAlignment() const { return _alignment; } + //! Set cell alignment. + ASMJIT_INLINE void setAlignment(uint32_t alignment) { _alignment = alignment; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Next active cell. + VarCell* _next; + + //! Offset, relative to base-offset. + int32_t _offset; + //! Size. + uint32_t _size; + //! Alignment. + uint32_t _alignment; +}; + +// ============================================================================ +// [asmjit::VarData] +// ============================================================================ + +//! HL variable data (base). +struct VarData { + // -------------------------------------------------------------------------- + // [Accessors - Base] + // -------------------------------------------------------------------------- + + //! Get variable name. + ASMJIT_INLINE const char* getName() const { return _name; } + //! Get variable id. + ASMJIT_INLINE uint32_t getId() const { return _id; } + //! Get variable type. + ASMJIT_INLINE uint32_t getType() const { return _type; } + //! Get variable class. + ASMJIT_INLINE uint32_t getClass() const { return _class; } + + // -------------------------------------------------------------------------- + // [Accessors - LocalId] + // -------------------------------------------------------------------------- + + //! Get whether the variable has a local id. + ASMJIT_INLINE bool hasLocalId() const { return _localId != kInvalidValue; } + //! Get a variable's local id. + ASMJIT_INLINE uint32_t getLocalId() const { return _localId; } + //! Set a variable's local id. + ASMJIT_INLINE void setLocalId(uint32_t localId) { _localId = localId; } + //! Reset a variable's local id. + ASMJIT_INLINE void resetLocalId() { _localId = kInvalidValue; } + + // -------------------------------------------------------------------------- + // [Accessors - Priority] + // -------------------------------------------------------------------------- + + //! Get variable priority, used by compiler to decide which variable to spill. + ASMJIT_INLINE uint32_t getPriority() const { return _priority; } + //! Set variable priority. + ASMJIT_INLINE void setPriority(uint32_t priority) { + ASMJIT_ASSERT(priority <= 0xFF); + _priority = static_cast(priority); + } + + // -------------------------------------------------------------------------- + // [Accessors - State] + // -------------------------------------------------------------------------- + + //! Get variable state, only used by `Context`. + ASMJIT_INLINE uint32_t getState() const { return _state; } + //! Set variable state, only used by `Context`. + ASMJIT_INLINE void setState(uint32_t state) { + ASMJIT_ASSERT(state <= 0xFF); + _state = static_cast(state); + } + + // -------------------------------------------------------------------------- + // [Accessors - RegIndex] + // -------------------------------------------------------------------------- + + //! Get register index. + ASMJIT_INLINE uint32_t getRegIndex() const { return _regIndex; } + //! Set register index. + ASMJIT_INLINE void setRegIndex(uint32_t regIndex) { + ASMJIT_ASSERT(regIndex <= kInvalidReg); + _regIndex = static_cast(regIndex); + } + //! Reset register index. + ASMJIT_INLINE void resetRegIndex() { + _regIndex = static_cast(kInvalidReg); + } + + // -------------------------------------------------------------------------- + // [Accessors - HomeIndex/Mask] + // -------------------------------------------------------------------------- + + //! Get home registers mask. + ASMJIT_INLINE uint32_t getHomeMask() const { return _homeMask; } + //! Add a home register index to the home registers mask. + ASMJIT_INLINE void addHomeIndex(uint32_t regIndex) { _homeMask |= Utils::mask(regIndex); } + + // -------------------------------------------------------------------------- + // [Accessors - Flags] + // -------------------------------------------------------------------------- + + //! Get variable flags. + ASMJIT_INLINE uint32_t getFlags() const { return _flags; } + + //! Get whether the VarData is only memory allocated on the stack. + ASMJIT_INLINE bool isStack() const { return static_cast(_isStack); } + //! Get whether the variable is a function argument passed through memory. + ASMJIT_INLINE bool isMemArg() const { return static_cast(_isMemArg); } + + //! Get variable content can be calculated by a simple instruction. + ASMJIT_INLINE bool isCalculated() const { return static_cast(_isCalculated); } + //! Get whether to save variable when it's unused (spill). + ASMJIT_INLINE bool saveOnUnuse() const { return static_cast(_saveOnUnuse); } + + //! Get whether the variable was changed. + ASMJIT_INLINE bool isModified() const { return static_cast(_modified); } + //! Set whether the variable was changed. + ASMJIT_INLINE void setModified(bool modified) { _modified = modified; } + + //! Get variable alignment. + ASMJIT_INLINE uint32_t getAlignment() const { return _alignment; } + //! Get variable size. + ASMJIT_INLINE uint32_t getSize() const { return _size; } + + //! Get home memory offset. + ASMJIT_INLINE int32_t getMemOffset() const { return _memOffset; } + //! Set home memory offset. + ASMJIT_INLINE void setMemOffset(int32_t offset) { _memOffset = offset; } + + //! Get home memory cell. + ASMJIT_INLINE VarCell* getMemCell() const { return _memCell; } + //! Set home memory cell. + ASMJIT_INLINE void setMemCell(VarCell* cell) { _memCell = cell; } + + // -------------------------------------------------------------------------- + // [Accessors - Temporary Usage] + // -------------------------------------------------------------------------- + + //! Get temporary VarAttr. + ASMJIT_INLINE VarAttr* getVa() const { return _va; } + //! Set temporary VarAttr. + ASMJIT_INLINE void setVa(VarAttr* va) { _va = va; } + //! Reset temporary VarAttr. + ASMJIT_INLINE void resetVa() { _va = nullptr; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Variable name. + const char* _name; + + //! Variable id. + uint32_t _id; + //! Variable's local id (initially `kInvalidValue`). + uint32_t _localId; + + //! Variable type. + uint8_t _type; + //! Variable class. + uint8_t _class; + //! Variable flags. + uint8_t _flags; + //! Variable priority. + uint8_t _priority; + + //! Variable state (connected with actual `VarState)`. + uint8_t _state; + //! Actual register index (only used by `Context)`, during translate. + uint8_t _regIndex; + + //! Whether the variable is only used as memory allocated on the stack. + uint8_t _isStack : 1; + //! Whether the variable is a function argument passed through memory. + uint8_t _isMemArg : 1; + //! Whether variable content can be calculated by a simple instruction. + //! + //! This is used mainly by MMX and SSE2 code. This flag indicates that + //! register allocator should never reserve memory for this variable, because + //! the content can be generated by a single instruction (for example PXOR). + uint8_t _isCalculated : 1; + //! Save on unuse (at end of the variable scope). + uint8_t _saveOnUnuse : 1; + //! Whether variable was changed (connected with actual `VarState)`. + uint8_t _modified : 1; + //! \internal + uint8_t _reserved0 : 3; + //! Variable natural alignment. + uint8_t _alignment; + + //! Variable size. + uint32_t _size; + + //! Mask of all registers variable has been allocated to. + uint32_t _homeMask; + + //! Home memory offset. + int32_t _memOffset; + //! Home memory cell, used by `Context` (initially nullptr). + VarCell* _memCell; + + //! Register read access statistics. + uint32_t rReadCount; + //! Register write access statistics. + uint32_t rWriteCount; + + //! Memory read statistics. + uint32_t mReadCount; + //! Memory write statistics. + uint32_t mWriteCount; + + // -------------------------------------------------------------------------- + // [Members - Temporary Usage] + // -------------------------------------------------------------------------- + + // These variables are only used during register allocation. They are + // initialized by init() phase and reset by cleanup() phase. + + union { + //! Temporary link to VarAttr* used by the `Context` used in + //! various phases, but always set back to nullptr when finished. + //! + //! This temporary data is designed to be used by algorithms that need to + //! store some data into variables themselves during compilation. But it's + //! expected that after variable is compiled & translated the data is set + //! back to zero/null. Initial value is nullptr. + VarAttr* _va; + + //! \internal + //! + //! Same as `_va` just provided as `uintptr_t`. + uintptr_t _vaUInt; + }; +}; + +// ============================================================================ +// [asmjit::VarAttr] +// ============================================================================ + +struct VarAttr { + // -------------------------------------------------------------------------- + // [Setup] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void setup(VarData* vd, uint32_t flags = 0, uint32_t inRegs = 0, uint32_t allocableRegs = 0) { + _vd = vd; + _flags = flags; + _varCount = 0; + _inRegIndex = kInvalidReg; + _outRegIndex = kInvalidReg; + _reserved = 0; + _inRegs = inRegs; + _allocableRegs = allocableRegs; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get VarData. + ASMJIT_INLINE VarData* getVd() const { return _vd; } + //! Set VarData. + ASMJIT_INLINE void setVd(VarData* vd) { _vd = vd; } + + //! Get flags. + ASMJIT_INLINE uint32_t getFlags() const { return _flags; } + //! Set flags. + ASMJIT_INLINE void setFlags(uint32_t flags) { _flags = flags; } + + //! Get whether `flag` is on. + ASMJIT_INLINE bool hasFlag(uint32_t flag) { return (_flags & flag) != 0; } + //! Add `flags`. + ASMJIT_INLINE void orFlags(uint32_t flags) { _flags |= flags; } + //! Mask `flags`. + ASMJIT_INLINE void andFlags(uint32_t flags) { _flags &= flags; } + //! Clear `flags`. + ASMJIT_INLINE void andNotFlags(uint32_t flags) { _flags &= ~flags; } + + //! Get how many times the variable is used by the instruction/node. + ASMJIT_INLINE uint32_t getVarCount() const { return _varCount; } + //! Set how many times the variable is used by the instruction/node. + ASMJIT_INLINE void setVarCount(uint32_t count) { _varCount = static_cast(count); } + //! Add how many times the variable is used by the instruction/node. + ASMJIT_INLINE void addVarCount(uint32_t count = 1) { _varCount += static_cast(count); } + + //! Get whether the variable has to be allocated in a specific input register. + ASMJIT_INLINE uint32_t hasInRegIndex() const { return _inRegIndex != kInvalidReg; } + //! Get the input register index or `kInvalidReg`. + ASMJIT_INLINE uint32_t getInRegIndex() const { return _inRegIndex; } + //! Set the input register index. + ASMJIT_INLINE void setInRegIndex(uint32_t index) { _inRegIndex = static_cast(index); } + //! Reset the input register index. + ASMJIT_INLINE void resetInRegIndex() { _inRegIndex = kInvalidReg; } + + //! Get whether the variable has to be allocated in a specific output register. + ASMJIT_INLINE uint32_t hasOutRegIndex() const { return _outRegIndex != kInvalidReg; } + //! Get the output register index or `kInvalidReg`. + ASMJIT_INLINE uint32_t getOutRegIndex() const { return _outRegIndex; } + //! Set the output register index. + ASMJIT_INLINE void setOutRegIndex(uint32_t index) { _outRegIndex = static_cast(index); } + //! Reset the output register index. + ASMJIT_INLINE void resetOutRegIndex() { _outRegIndex = kInvalidReg; } + + //! Get whether the mandatory input registers are in used. + ASMJIT_INLINE bool hasInRegs() const { return _inRegs != 0; } + //! Get mandatory input registers (mask). + ASMJIT_INLINE uint32_t getInRegs() const { return _inRegs; } + //! Set mandatory input registers (mask). + ASMJIT_INLINE void setInRegs(uint32_t mask) { _inRegs = mask; } + //! Add mandatory input registers (mask). + ASMJIT_INLINE void addInRegs(uint32_t mask) { _inRegs |= mask; } + //! And mandatory input registers (mask). + ASMJIT_INLINE void andInRegs(uint32_t mask) { _inRegs &= mask; } + //! Clear mandatory input registers (mask). + ASMJIT_INLINE void delInRegs(uint32_t mask) { _inRegs &= ~mask; } + + //! Get allocable input registers (mask). + ASMJIT_INLINE uint32_t getAllocableRegs() const { return _allocableRegs; } + //! Set allocable input registers (mask). + ASMJIT_INLINE void setAllocableRegs(uint32_t mask) { _allocableRegs = mask; } + //! Add allocable input registers (mask). + ASMJIT_INLINE void addAllocableRegs(uint32_t mask) { _allocableRegs |= mask; } + //! And allocable input registers (mask). + ASMJIT_INLINE void andAllocableRegs(uint32_t mask) { _allocableRegs &= mask; } + //! Clear allocable input registers (mask). + ASMJIT_INLINE void delAllocableRegs(uint32_t mask) { _allocableRegs &= ~mask; } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE VarAttr& operator=(const VarAttr& other) { + ::memcpy(this, &other, sizeof(VarAttr)); + return *this; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + VarData* _vd; + //! Flags. + uint32_t _flags; + + union { + struct { + //! How many times the variable is used by the instruction/node. + uint8_t _varCount; + //! Input register index or `kInvalidReg` if it's not given. + //! + //! Even if the input register index is not given (i.e. it may by any + //! register), register allocator should assign an index that will be + //! used to persist a variable into this specific index. It's helpful + //! in situations where one variable has to be allocated in multiple + //! registers to determine the register which will be persistent. + uint8_t _inRegIndex; + //! Output register index or `kInvalidReg` if it's not given. + //! + //! Typically `kInvalidReg` if variable is only used on input. + uint8_t _outRegIndex; + //! \internal + uint8_t _reserved; + }; + + //! \internal + //! + //! Packed data #0. + uint32_t _packed; + }; + + //! Mandatory input registers. + //! + //! Mandatory input registers are required by the instruction even if + //! there are duplicates. This schema allows us to allocate one variable + //! in one or more register when needed. Required mostly by instructions + //! that have implicit register operands (imul, cpuid, ...) and function + //! call. + uint32_t _inRegs; + + //! Allocable input registers. + //! + //! Optional input registers is a mask of all allocable registers for a given + //! variable where we have to pick one of them. This mask is usually not used + //! when _inRegs is set. If both masks are used then the register + //! allocator tries first to find an intersection between these and allocates + //! an extra slot if not found. + uint32_t _allocableRegs; +}; + +// ============================================================================ +// [asmjit::VarMap] +// ============================================================================ + +//! Variables' map related to a single node (instruction / other node). +struct VarMap { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get count of variables (all). + ASMJIT_INLINE uint32_t getVaCount() const { + return _vaCount; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Variables count. + uint32_t _vaCount; +}; + +// ============================================================================ +// [asmjit::VarState] +// ============================================================================ + +//! Variables' state. +struct VarState {}; + +// ============================================================================ +// [asmjit::Context] +// ============================================================================ + +//! \internal +//! +//! Code generation context is the logic behind `Compiler`. The context is +//! used to compile the code stored in `Compiler`. +struct Context { + ASMJIT_NO_COPY(Context) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + Context(Compiler* compiler); + virtual ~Context(); + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! Reset the whole context. + virtual void reset(bool releaseMemory = false); + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get compiler. + ASMJIT_INLINE Compiler* getCompiler() const { return _compiler; } + + //! Get function. + ASMJIT_INLINE HLFunc* getFunc() const { return _func; } + //! Get stop node. + ASMJIT_INLINE HLNode* getStop() const { return _stop; } + + //! Get start of the current scope. + ASMJIT_INLINE HLNode* getStart() const { return _start; } + //! Get end of the current scope. + ASMJIT_INLINE HLNode* getEnd() const { return _end; } + + //! Get extra block. + ASMJIT_INLINE HLNode* getExtraBlock() const { return _extraBlock; } + //! Set extra block. + ASMJIT_INLINE void setExtraBlock(HLNode* node) { _extraBlock = node; } + + // -------------------------------------------------------------------------- + // [Error] + // -------------------------------------------------------------------------- + + //! Get the last error code. + ASMJIT_INLINE Error getLastError() const { + return getCompiler()->getLastError(); + } + + //! Set the last error code and propagate it through the error handler. + ASMJIT_INLINE Error setLastError(Error error, const char* message = nullptr) { + return getCompiler()->setLastError(error, message); + } + + // -------------------------------------------------------------------------- + // [State] + // -------------------------------------------------------------------------- + + //! Get current state. + ASMJIT_INLINE VarState* getState() const { return _state; } + + //! Load current state from `target` state. + virtual void loadState(VarState* src) = 0; + + //! Save current state, returning new `VarState` instance. + virtual VarState* saveState() = 0; + + //! Change the current state to `target` state. + virtual void switchState(VarState* src) = 0; + + //! Change the current state to the intersection of two states `a` and `b`. + virtual void intersectStates(VarState* a, VarState* b) = 0; + + // -------------------------------------------------------------------------- + // [Context] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Error _registerContextVar(VarData* vd) { + if (vd->hasLocalId()) + return kErrorOk; + + uint32_t cid = static_cast(_contextVd.getLength()); + ASMJIT_PROPAGATE_ERROR(_contextVd.append(vd)); + + vd->setLocalId(cid); + return kErrorOk; + } + + // -------------------------------------------------------------------------- + // [Mem] + // -------------------------------------------------------------------------- + + VarCell* _newVarCell(VarData* vd); + VarCell* _newStackCell(uint32_t size, uint32_t alignment); + + ASMJIT_INLINE VarCell* getVarCell(VarData* vd) { + VarCell* cell = vd->getMemCell(); + return cell ? cell : _newVarCell(vd); + } + + virtual Error resolveCellOffsets(); + + // -------------------------------------------------------------------------- + // [Bits] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BitArray* newBits(uint32_t len) { + return static_cast( + _zoneAllocator.allocZeroed(static_cast(len) * BitArray::kEntitySize)); + } + + ASMJIT_INLINE BitArray* copyBits(const BitArray* src, uint32_t len) { + return static_cast( + _zoneAllocator.dup(src, static_cast(len) * BitArray::kEntitySize)); + } + + // -------------------------------------------------------------------------- + // [Fetch] + // -------------------------------------------------------------------------- + + //! Fetch. + //! + //! Fetch iterates over all nodes and gathers information about all variables + //! used. The process generates information required by register allocator, + //! variable liveness analysis and translator. + virtual Error fetch() = 0; + + // -------------------------------------------------------------------------- + // [Unreachable Code] + // -------------------------------------------------------------------------- + + //! Add unreachable-flow data to the unreachable flow list. + ASMJIT_INLINE Error addUnreachableNode(HLNode* node) { + PodList::Link* link = _zoneAllocator.allocT::Link>(); + if (link == nullptr) + return setLastError(kErrorNoHeapMemory); + + link->setValue(node); + _unreachableList.append(link); + + return kErrorOk; + } + + //! Remove unreachable code. + virtual Error removeUnreachableCode(); + + // -------------------------------------------------------------------------- + // [Code-Flow] + // -------------------------------------------------------------------------- + + //! Add returning node (i.e. node that returns and where liveness analysis + //! should start). + ASMJIT_INLINE Error addReturningNode(HLNode* node) { + PodList::Link* link = _zoneAllocator.allocT::Link>(); + if (link == nullptr) + return setLastError(kErrorNoHeapMemory); + + link->setValue(node); + _returningList.append(link); + + return kErrorOk; + } + + //! Add jump-flow data to the jcc flow list. + ASMJIT_INLINE Error addJccNode(HLNode* node) { + PodList::Link* link = _zoneAllocator.allocT::Link>(); + if (link == nullptr) + return setLastError(kErrorNoHeapMemory); + + link->setValue(node); + _jccList.append(link); + + return kErrorOk; + } + + // -------------------------------------------------------------------------- + // [Analyze] + // -------------------------------------------------------------------------- + + //! Perform variable liveness analysis. + //! + //! Analysis phase iterates over nodes in reverse order and generates a bit + //! array describing variables that are alive at every node in the function. + //! When the analysis start all variables are assumed dead. When a read or + //! read/write operations of a variable is detected the variable becomes + //! alive; when only write operation is detected the variable becomes dead. + //! + //! When a label is found all jumps to that label are followed and analysis + //! repeats until all variables are resolved. + virtual Error livenessAnalysis(); + + // -------------------------------------------------------------------------- + // [Annotate] + // -------------------------------------------------------------------------- + + virtual Error annotate() = 0; + virtual Error formatInlineComment(StringBuilder& dst, HLNode* node); + + // -------------------------------------------------------------------------- + // [Translate] + // -------------------------------------------------------------------------- + + //! Translate code by allocating registers and handling state changes. + virtual Error translate() = 0; + + // -------------------------------------------------------------------------- + // [Cleanup] + // -------------------------------------------------------------------------- + + virtual void cleanup(); + + // -------------------------------------------------------------------------- + // [Compile] + // -------------------------------------------------------------------------- + + virtual Error compile(HLFunc* func); + + // -------------------------------------------------------------------------- + // [Serialize] + // -------------------------------------------------------------------------- + + virtual Error serialize(Assembler* assembler, HLNode* start, HLNode* stop) = 0; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Compiler. + Compiler* _compiler; + //! Function. + HLFunc* _func; + + //! Zone allocator. + Zone _zoneAllocator; + + //! \internal + typedef void (ASMJIT_CDECL* TraceNodeFunc)(Context* self, HLNode* node_, const char* prefix); + //! \internal + //! + //! Only non-NULL when ASMJIT_TRACE is enabled. + TraceNodeFunc _traceNode; + + //! \internal + //! + //! Offset (how many bytes to add) to `VarMap` to get `VarAttr` array. Used + //! by liveness analysis shared across all backends. This is needed because + //! `VarMap` is a base class for a specialized version that liveness analysis + //! doesn't use, it just needs `VarAttr` array. + uint32_t _varMapToVaListOffset; + + //! Start of the current active scope. + HLNode* _start; + //! End of the current active scope. + HLNode* _end; + + //! Node that is used to insert extra code after the function body. + HLNode* _extraBlock; + //! Stop node. + HLNode* _stop; + + //! Unreachable nodes. + PodList _unreachableList; + //! Returning nodes. + PodList _returningList; + //! Jump nodes. + PodList _jccList; + + //! All variables used by the current function. + PodVector _contextVd; + + //! Memory used to spill variables. + VarCell* _memVarCells; + //! Memory used to alloc memory on the stack. + VarCell* _memStackCells; + + //! Count of 1-byte cells. + uint32_t _mem1ByteVarsUsed; + //! Count of 2-byte cells. + uint32_t _mem2ByteVarsUsed; + //! Count of 4-byte cells. + uint32_t _mem4ByteVarsUsed; + //! Count of 8-byte cells. + uint32_t _mem8ByteVarsUsed; + //! Count of 16-byte cells. + uint32_t _mem16ByteVarsUsed; + //! Count of 32-byte cells. + uint32_t _mem32ByteVarsUsed; + //! Count of 64-byte cells. + uint32_t _mem64ByteVarsUsed; + //! Count of stack memory cells. + uint32_t _memStackCellsUsed; + + //! Maximum memory alignment used by the function. + uint32_t _memMaxAlign; + //! Count of bytes used by variables. + uint32_t _memVarTotal; + //! Count of bytes used by stack. + uint32_t _memStackTotal; + //! Count of bytes used by variables and stack after alignment. + uint32_t _memAllTotal; + + //! Default lenght of annotated instruction. + uint32_t _annotationLength; + + //! Current state (used by register allocator). + VarState* _state; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER +#endif // _ASMJIT_BASE_COMPILERCONTEXT_P_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/compilerfunc.h b/DynamicHooks/thirdparty/AsmJit/base/compilerfunc.h new file mode 100644 index 0000000..86b24c6 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/compilerfunc.h @@ -0,0 +1,679 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_COMPILERFUNC_H +#define _ASMJIT_BASE_COMPILERFUNC_H + +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/operand.h" +#include "../base/utils.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::FuncHint] +// ============================================================================ + +//! Function hints. +//! +//! For a platform specific calling conventions, see: +//! - `X86FuncHint` - X86/X64 function hints. +ASMJIT_ENUM(FuncHint) { + //! Generate a naked function by omitting its prolog and epilog (default true). + //! + //! Naked functions should always result in less code required for function's + //! prolog and epilog. In addition, on X86/64 naked functions save one register + //! (ebp or rbp), which can be used by the function instead. + kFuncHintNaked = 0, + + //! Generate a compact function prolog/epilog if possible (default true). + //! + //! X86/X64 Specific + //! ---------------- + //! + //! Use shorter, but possible slower prolog/epilog sequence to save/restore + //! registers. At the moment this only enables emitting `leave` in function's + //! epilog to make the code shorter, however, the counterpart `enter` is not + //! used in function's prolog for performance reasons. + kFuncHintCompact = 1, + + //! Emit `emms` instruction in the function's epilog. + kFuncHintX86Emms = 17, + //! Emit `sfence` instruction in the function's epilog. + kFuncHintX86SFence = 18, + //! Emit `lfence` instruction in the function's epilog. + kFuncHintX86LFence = 19 +}; + +// ============================================================================ +// [asmjit::FuncFlags] +// ============================================================================ + +//! Function flags. +ASMJIT_ENUM(FuncFlags) { + //! Whether the function is using naked (minimal) prolog / epilog. + kFuncFlagIsNaked = 0x00000001, + + //! Whether an another function is called from this function. + kFuncFlagIsCaller = 0x00000002, + + //! Whether the stack is not aligned to the required stack alignment, + //! thus it has to be aligned manually. + kFuncFlagIsStackMisaligned = 0x00000004, + + //! Whether the stack pointer is adjusted by the stack size needed + //! to save registers and function variables. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! Stack pointer (ESP/RSP) is adjusted by 'sub' instruction in prolog and by + //! 'add' instruction in epilog (only if function is not naked). If function + //! needs to perform manual stack alignment more instructions are used to + //! adjust the stack (like "and zsp, -Alignment"). + kFuncFlagIsStackAdjusted = 0x00000008, + + //! Whether the function is finished using `Compiler::endFunc()`. + kFuncFlagIsFinished = 0x80000000, + + //! Whether to emit `leave` instead of two instructions in case that the + //! function saves and restores the frame pointer. + kFuncFlagX86Leave = 0x00010000, + + //! Whether it's required to move arguments to a new stack location, + //! because of manual aligning. + kFuncFlagX86MoveArgs = 0x00040000, + + //! Whether to emit `emms` instruction in epilog (auto-detected). + kFuncFlagX86Emms = 0x01000000, + + //! Whether to emit `sfence` instruction in epilog (auto-detected). + //! + //! `kFuncFlagX86SFence` with `kFuncFlagX86LFence` results in emitting `mfence`. + kFuncFlagX86SFence = 0x02000000, + + //! Whether to emit `lfence` instruction in epilog (auto-detected). + //! + //! `kFuncFlagX86SFence` with `kFuncFlagX86LFence` results in emitting `mfence`. + kFuncFlagX86LFence = 0x04000000 +}; + +// ============================================================================ +// [asmjit::FuncDir] +// ============================================================================ + +//! Function arguments direction. +ASMJIT_ENUM(FuncDir) { + //! Arguments are passed left to right. + //! + //! This arguments direction is unusual in C, however it's used in Pascal. + kFuncDirLTR = 0, + + //! Arguments are passed right ro left + //! + //! This is the default argument direction in C. + kFuncDirRTL = 1 +}; + +// ============================================================================ +// [asmjit::FuncMisc] +// ============================================================================ + +enum { + //! Function doesn't have variable number of arguments (`...`) (default). + kFuncNoVarArgs = 0xFF, + //! Invalid stack offset in function or function parameter. + kFuncStackInvalid = -1 +}; + +// ============================================================================ +// [asmjit::FuncArgIndex] +// ============================================================================ + +//! Function argument index (lo/hi). +ASMJIT_ENUM(FuncArgIndex) { + //! Maxumum number of function arguments supported by AsmJit. + kFuncArgCount = 16, + //! Extended maximum number of arguments (used internally). + kFuncArgCountLoHi = kFuncArgCount * 2, + + //! Index to the LO part of function argument (default). + //! + //! This value is typically omitted and added only if there is HI argument + //! accessed. + kFuncArgLo = 0, + + //! Index to the HI part of function argument. + //! + //! HI part of function argument depends on target architecture. On x86 it's + //! typically used to transfer 64-bit integers (they form a pair of 32-bit + //! integers). + kFuncArgHi = kFuncArgCount +}; + +// ============================================================================ +// [asmjit::FuncRet] +// ============================================================================ + +//! Function return value (lo/hi) specification. +ASMJIT_ENUM(FuncRet) { + //! Index to the LO part of function return value. + kFuncRetLo = 0, + //! Index to the HI part of function return value. + kFuncRetHi = 1 +}; + +// ============================================================================ +// [asmjit::TypeId] +// ============================================================================ + +//! Function builder's `void` type. +struct Void {}; + +//! Function builder's `int8_t` type. +struct Int8Type {}; +//! Function builder's `uint8_t` type. +struct UInt8Type {}; + +//! Function builder's `int16_t` type. +struct Int16Type {}; +//! Function builder's `uint16_t` type. +struct UInt16Type {}; + +//! Function builder's `int32_t` type. +struct Int32Type {}; +//! Function builder's `uint32_t` type. +struct UInt32Type {}; + +//! Function builder's `int64_t` type. +struct Int64Type {}; +//! Function builder's `uint64_t` type. +struct UInt64Type {}; + +//! Function builder's `intptr_t` type. +struct IntPtrType {}; +//! Function builder's `uintptr_t` type. +struct UIntPtrType {}; + +//! Function builder's `float` type. +struct FloatType {}; +//! Function builder's `double` type. +struct DoubleType {}; + +#if !defined(ASMJIT_DOCGEN) +template +struct TypeId { + // Let it fail here if `T` was not specialized. +}; + +template +struct TypeId { + enum { kId = kVarTypeIntPtr }; +}; + +template +struct TypeIdOfInt { + enum { kId = (sizeof(T) == 1) ? (int)(IntTraits::kIsSigned ? kVarTypeInt8 : kVarTypeUInt8 ) : + (sizeof(T) == 2) ? (int)(IntTraits::kIsSigned ? kVarTypeInt16 : kVarTypeUInt16) : + (sizeof(T) == 4) ? (int)(IntTraits::kIsSigned ? kVarTypeInt32 : kVarTypeUInt32) : + (sizeof(T) == 8) ? (int)(IntTraits::kIsSigned ? kVarTypeInt64 : kVarTypeUInt64) : (int)kInvalidVar + }; +}; + +#define ASMJIT_TYPE_ID(T, ID) \ + template<> struct TypeId { enum { kId = ID }; } + +ASMJIT_TYPE_ID(void , kInvalidVar); +ASMJIT_TYPE_ID(signed char , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(unsigned char , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(short , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(unsigned short , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(int , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(unsigned int , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(long , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(unsigned long , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(float , kVarTypeFp32); +ASMJIT_TYPE_ID(double , kVarTypeFp64); + +#if ASMJIT_CC_HAS_NATIVE_CHAR +ASMJIT_TYPE_ID(char , TypeIdOfInt::kId); +#endif +#if ASMJIT_CC_HAS_NATIVE_WCHAR_T +ASMJIT_TYPE_ID(wchar_t , TypeIdOfInt::kId); +#endif +#if ASMJIT_CC_HAS_NATIVE_CHAR16_T +ASMJIT_TYPE_ID(char16_t , TypeIdOfInt::kId); +#endif +#if ASMJIT_CC_HAS_NATIVE_CHAR32_T +ASMJIT_TYPE_ID(char32_t , TypeIdOfInt::kId); +#endif + +#if ASMJIT_CC_MSC && !ASMJIT_CC_MSC_GE(16, 0, 0) +ASMJIT_TYPE_ID(__int64 , TypeIdOfInt<__int64>::kId); +ASMJIT_TYPE_ID(unsigned __int64 , TypeIdOfInt::kId); +#else +ASMJIT_TYPE_ID(long long , TypeIdOfInt::kId); +ASMJIT_TYPE_ID(unsigned long long, TypeIdOfInt::kId); +#endif + +ASMJIT_TYPE_ID(Void , kInvalidVar); +ASMJIT_TYPE_ID(Int8Type , kVarTypeInt8); +ASMJIT_TYPE_ID(UInt8Type , kVarTypeUInt8); +ASMJIT_TYPE_ID(Int16Type , kVarTypeInt16); +ASMJIT_TYPE_ID(UInt16Type , kVarTypeUInt16); +ASMJIT_TYPE_ID(Int32Type , kVarTypeInt32); +ASMJIT_TYPE_ID(UInt32Type , kVarTypeUInt32); +ASMJIT_TYPE_ID(Int64Type , kVarTypeInt64); +ASMJIT_TYPE_ID(UInt64Type , kVarTypeUInt64); +ASMJIT_TYPE_ID(IntPtrType , kVarTypeIntPtr); +ASMJIT_TYPE_ID(UIntPtrType , kVarTypeUIntPtr); +ASMJIT_TYPE_ID(FloatType , kVarTypeFp32); +ASMJIT_TYPE_ID(DoubleType , kVarTypeFp64); +#endif // !ASMJIT_DOCGEN + +// ============================================================================ +// [asmjit::FuncInOut] +// ============================================================================ + +//! Function in/out - argument or return value translated from `FuncPrototype`. +struct FuncInOut { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t getVarType() const noexcept { return _varType; } + + ASMJIT_INLINE bool hasRegIndex() const noexcept { return _regIndex != kInvalidReg; } + ASMJIT_INLINE uint32_t getRegIndex() const noexcept { return _regIndex; } + + ASMJIT_INLINE bool hasStackOffset() const noexcept { return _stackOffset != kFuncStackInvalid; } + ASMJIT_INLINE int32_t getStackOffset() const noexcept { return static_cast(_stackOffset); } + + //! Get whether the argument / return value is assigned. + ASMJIT_INLINE bool isSet() const noexcept { + return (_regIndex != kInvalidReg) | (_stackOffset != kFuncStackInvalid); + } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! Reset the function argument to "unassigned state". + ASMJIT_INLINE void reset() noexcept { _packed = 0xFFFFFFFFU; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + struct { + //! Variable type, see \ref VarType. + uint8_t _varType; + //! Register index if argument / return value is a register. + uint8_t _regIndex; + //! Stack offset if argument / return value is on the stack. + int16_t _stackOffset; + }; + + //! All members packed into single 32-bit integer. + uint32_t _packed; + }; +}; + +// ============================================================================ +// [asmjit::FuncPrototype] +// ============================================================================ + +//! Function prototype. +//! +//! Function prototype contains information about function return type, count +//! of arguments and their types. Function prototype is a low level structure +//! which doesn't contain platform specific or calling convention specific +//! information. Function prototype is used to create a `FuncDecl`. +struct FuncPrototype { + // -------------------------------------------------------------------------- + // [Setup] + // -------------------------------------------------------------------------- + + //! Setup the prototype. + ASMJIT_INLINE void setup( + uint32_t callConv, + uint32_t ret, + const uint32_t* args, uint32_t numArgs) noexcept { + + ASMJIT_ASSERT(callConv <= 0xFF); + ASMJIT_ASSERT(numArgs <= 0xFF); + + _callConv = static_cast(callConv); + _varArgs = kFuncNoVarArgs; + _numArgs = static_cast(numArgs); + _reserved = 0; + + _ret = ret; + _args = args; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the function's calling convention. + ASMJIT_INLINE uint32_t getCallConv() const noexcept { return _callConv; } + //! Get the variable arguments `...` index, `kFuncNoVarArgs` if none. + ASMJIT_INLINE uint32_t getVarArgs() const noexcept { return _varArgs; } + //! Get the number of function arguments. + ASMJIT_INLINE uint32_t getNumArgs() const noexcept { return _numArgs; } + + //! Get the return value type. + ASMJIT_INLINE uint32_t getRet() const noexcept { return _ret; } + //! Get the type of the argument at index `i`. + ASMJIT_INLINE uint32_t getArg(uint32_t i) const noexcept { + ASMJIT_ASSERT(i < _numArgs); + return _args[i]; + } + //! Get the array of function arguments' types. + ASMJIT_INLINE const uint32_t* getArgs() const noexcept { return _args; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uint8_t _callConv; + uint8_t _varArgs; + uint8_t _numArgs; + uint8_t _reserved; + + uint32_t _ret; + const uint32_t* _args; +}; + +// ============================================================================ +// [asmjit::FuncBuilderX] +// ============================================================================ + +// TODO: Rename to `DynamicFuncBuilder` +//! Custom function builder for up to 32 function arguments. +struct FuncBuilderX : public FuncPrototype { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE FuncBuilderX(uint32_t callConv = kCallConvHost) noexcept { + setup(callConv, kInvalidVar, _builderArgList, 0); + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void setCallConv(uint32_t callConv) noexcept { + ASMJIT_ASSERT(callConv <= 0xFF); + _callConv = static_cast(callConv); + } + + //! Set the return type to `retType`. + ASMJIT_INLINE void setRet(uint32_t retType) noexcept { + _ret = retType; + } + //! Set the return type based on `T`. + template + ASMJIT_INLINE void setRetT() noexcept { setRet(TypeId::kId); } + + //! Set the argument at index `i` to the `type` + ASMJIT_INLINE void setArg(uint32_t i, uint32_t type) noexcept { + ASMJIT_ASSERT(i < _numArgs); + _builderArgList[i] = type; + } + //! Set the argument at index `i` to the type based on `T`. + template + ASMJIT_INLINE void setArgT(uint32_t i) noexcept { setArg(i, TypeId::kId); } + + //! Append an argument of `type` to the function prototype. + ASMJIT_INLINE void addArg(uint32_t type) noexcept { + ASMJIT_ASSERT(_numArgs < kFuncArgCount); + _builderArgList[_numArgs++] = type; + } + //! Append an argument of type based on `T` to the function prototype. + template + ASMJIT_INLINE void addArgT() noexcept { addArg(TypeId::kId); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uint32_t _builderArgList[kFuncArgCount]; +}; + +//! \internal +#define T(_Type_) TypeId<_Type_>::kId + +//! Function prototype (no args). +template +struct FuncBuilder0 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder0(uint32_t callConv = kCallConvHost) noexcept { + setup(callConv, T(RET), nullptr, 0); + } +}; + +//! Function prototype (1 argument). +template +struct FuncBuilder1 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder1(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (2 arguments). +template +struct FuncBuilder2 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder2(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (3 arguments). +template +struct FuncBuilder3 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder3(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (4 arguments). +template +struct FuncBuilder4 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder4(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2), T(P3) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (5 arguments). +template +struct FuncBuilder5 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder5(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2), T(P3), T(P4) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (6 arguments). +template +struct FuncBuilder6 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder6(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2), T(P3), T(P4), T(P5) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (7 arguments). +template +struct FuncBuilder7 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder7(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2), T(P3), T(P4), T(P5), T(P6) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (8 arguments). +template +struct FuncBuilder8 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder8(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2), T(P3), T(P4), T(P5), T(P6), T(P7) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (9 arguments). +template +struct FuncBuilder9 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder9(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2), T(P3), T(P4), T(P5), T(P6), T(P7), T(P8) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! Function prototype (10 arguments). +template +struct FuncBuilder10 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder10(uint32_t callConv = kCallConvHost) noexcept { + static const uint32_t args[] = { T(P0), T(P1), T(P2), T(P3), T(P4), T(P5), T(P6), T(P7), T(P8), T(P9) }; + setup(callConv, T(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; +#undef T + +// ============================================================================ +// [asmjit::FuncDecl] +// ============================================================================ + +//! Function declaration. +struct FuncDecl { + // -------------------------------------------------------------------------- + // [Accessors - Calling Convention] + // -------------------------------------------------------------------------- + + //! Get the function's calling convention, see `CallConv`. + ASMJIT_INLINE uint32_t getCallConv() const noexcept { return _callConv; } + + //! Get whether the callee pops the stack. + ASMJIT_INLINE uint32_t getCalleePopsStack() const noexcept { return _calleePopsStack; } + + //! Get direction of arguments passed on the stack. + //! + //! Direction should be always `kFuncDirRTL`. + //! + //! NOTE: This is related to used calling convention, it's not affected by + //! number of function arguments or their types. + ASMJIT_INLINE uint32_t getArgsDirection() const noexcept { return _argsDirection; } + + //! Get stack size needed for function arguments passed on the stack. + ASMJIT_INLINE uint32_t getArgStackSize() const noexcept { return _argStackSize; } + //! Get size of "Red Zone". + ASMJIT_INLINE uint32_t getRedZoneSize() const noexcept { return _redZoneSize; } + //! Get size of "Spill Zone". + ASMJIT_INLINE uint32_t getSpillZoneSize() const noexcept { return _spillZoneSize; } + + // -------------------------------------------------------------------------- + // [Accessors - Arguments and Return] + // -------------------------------------------------------------------------- + + //! Get whether the function has a return value. + ASMJIT_INLINE bool hasRet() const noexcept { return _retCount != 0; } + //! Get count of function return values. + ASMJIT_INLINE uint32_t getRetCount() const noexcept { return _retCount; } + + //! Get function return value. + ASMJIT_INLINE FuncInOut& getRet(uint32_t index = kFuncRetLo) noexcept { return _rets[index]; } + //! Get function return value. + ASMJIT_INLINE const FuncInOut& getRet(uint32_t index = kFuncRetLo) const noexcept { return _rets[index]; } + + //! Get the number of function arguments. + ASMJIT_INLINE uint32_t getNumArgs() const noexcept { return _numArgs; } + + //! Get function arguments array. + ASMJIT_INLINE FuncInOut* getArgs() noexcept { return _args; } + //! Get function arguments array (const). + ASMJIT_INLINE const FuncInOut* getArgs() const noexcept { return _args; } + + //! Get function argument at index `index`. + ASMJIT_INLINE FuncInOut& getArg(size_t index) noexcept { + ASMJIT_ASSERT(index < kFuncArgCountLoHi); + return _args[index]; + } + + //! Get function argument at index `index`. + ASMJIT_INLINE const FuncInOut& getArg(size_t index) const noexcept { + ASMJIT_ASSERT(index < kFuncArgCountLoHi); + return _args[index]; + } + + ASMJIT_INLINE void resetArg(size_t index) noexcept { + ASMJIT_ASSERT(index < kFuncArgCountLoHi); + _args[index].reset(); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Calling convention. + uint8_t _callConv; + //! Whether a callee pops stack. + uint8_t _calleePopsStack : 1; + //! Direction for arguments passed on the stack, see `FuncDir`. + uint8_t _argsDirection : 1; + //! Reserved #0 (alignment). + uint8_t _reserved0 : 6; + + //! Number of function arguments. + uint8_t _numArgs; + //! Number of function return values. + uint8_t _retCount; + + //! Count of bytes consumed by arguments on the stack (aligned). + uint32_t _argStackSize; + + //! Size of "Red Zone". + //! + //! NOTE: Used by AMD64-ABI (128 bytes). + uint16_t _redZoneSize; + + //! Size of "Spill Zone". + //! + //! NOTE: Used by WIN64-ABI (32 bytes). + uint16_t _spillZoneSize; + + //! Function arguments (LO & HI) mapped to physical registers and stack. + FuncInOut _args[kFuncArgCountLoHi]; + + //! Function return value(s). + FuncInOut _rets[2]; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER +#endif // _ASMJIT_BASE_COMPILERFUNC_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/constpool.cpp b/DynamicHooks/thirdparty/AsmJit/base/constpool.cpp new file mode 100644 index 0000000..38bf492 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/constpool.cpp @@ -0,0 +1,523 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/constpool.h" +#include "../base/utils.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// Binary tree code is based on Julienne Walker's "Andersson Binary Trees" +// article and implementation. However, only three operations are implemented - +// get, insert and traverse. + +// ============================================================================ +// [asmjit::ConstPool::Tree - Ops] +// ============================================================================ + +//! \internal +//! +//! Remove left horizontal links. +static ASMJIT_INLINE ConstPool::Node* ConstPoolTree_skewNode(ConstPool::Node* node) noexcept { + ConstPool::Node* link = node->_link[0]; + uint32_t level = node->_level; + + if (level != 0 && link != nullptr && link->_level == level) { + node->_link[0] = link->_link[1]; + link->_link[1] = node; + + node = link; + } + + return node; +} + +//! \internal +//! +//! Remove consecutive horizontal links. +static ASMJIT_INLINE ConstPool::Node* ConstPoolTree_splitNode(ConstPool::Node* node) noexcept { + ConstPool::Node* link = node->_link[1]; + uint32_t level = node->_level; + + if (level != 0 && link != nullptr && link->_link[1] != nullptr && link->_link[1]->_level == level) { + node->_link[1] = link->_link[0]; + link->_link[0] = node; + + node = link; + node->_level++; + } + + return node; +} + +ConstPool::Node* ConstPool::Tree::get(const void* data) noexcept { + ConstPool::Node* node = _root; + size_t dataSize = _dataSize; + + while (node != nullptr) { + int c = ::memcmp(node->getData(), data, dataSize); + if (c == 0) + return node; + node = node->_link[c < 0]; + } + + return nullptr; +} + +void ConstPool::Tree::put(ConstPool::Node* newNode) noexcept { + size_t dataSize = _dataSize; + + _length++; + if (_root == nullptr) { + _root = newNode; + return; + } + + ConstPool::Node* node = _root; + ConstPool::Node* stack[kHeightLimit]; + + unsigned int top = 0; + unsigned int dir; + + // Find a spot and save the stack. + for (;;) { + stack[top++] = node; + dir = ::memcmp(node->getData(), newNode->getData(), dataSize) < 0; + + ConstPool::Node* link = node->_link[dir]; + if (link == nullptr) + break; + + node = link; + } + + // Link and rebalance. + node->_link[dir] = newNode; + + while (top > 0) { + // Which child? + node = stack[--top]; + + if (top != 0) { + dir = stack[top - 1]->_link[1] == node; + } + + node = ConstPoolTree_skewNode(node); + node = ConstPoolTree_splitNode(node); + + // Fix the parent. + if (top != 0) + stack[top - 1]->_link[dir] = node; + else + _root = node; + } +} + +// ============================================================================ +// [asmjit::ConstPool - Construction / Destruction] +// ============================================================================ + +ConstPool::ConstPool(Zone* zone) noexcept { + _zone = zone; + + size_t dataSize = 1; + for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) { + _tree[i].setDataSize(dataSize); + _gaps[i] = nullptr; + dataSize <<= 1; + } + + _gapPool = nullptr; + _size = 0; + _alignment = 0; +} + +ConstPool::~ConstPool() noexcept {} + +// ============================================================================ +// [asmjit::ConstPool - Reset] +// ============================================================================ + +void ConstPool::reset() noexcept { + for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) { + _tree[i].reset(); + _gaps[i] = nullptr; + } + + _gapPool = nullptr; + _size = 0; + _alignment = 0; +} + +// ============================================================================ +// [asmjit::ConstPool - Ops] +// ============================================================================ + +static ASMJIT_INLINE ConstPool::Gap* ConstPool_allocGap(ConstPool* self) noexcept { + ConstPool::Gap* gap = self->_gapPool; + if (gap == nullptr) + return self->_zone->allocT(); + + self->_gapPool = gap->_next; + return gap; +} + +static ASMJIT_INLINE void ConstPool_freeGap(ConstPool* self, ConstPool::Gap* gap) noexcept { + gap->_next = self->_gapPool; + self->_gapPool = gap; +} + +static void ConstPool_addGap(ConstPool* self, size_t offset, size_t length) noexcept { + ASMJIT_ASSERT(length > 0); + + while (length > 0) { + size_t gapIndex; + size_t gapLength; + + if (length >= 16 && Utils::isAligned(offset, 16)) { + gapIndex = ConstPool::kIndex16; + gapLength = 16; + } + else if (length >= 8 && Utils::isAligned(offset, 8)) { + gapIndex = ConstPool::kIndex8; + gapLength = 8; + } + else if (length >= 4 && Utils::isAligned(offset, 4)) { + gapIndex = ConstPool::kIndex4; + gapLength = 4; + } + else if (length >= 2 && Utils::isAligned(offset, 2)) { + gapIndex = ConstPool::kIndex2; + gapLength = 2; + } + else { + gapIndex = ConstPool::kIndex1; + gapLength = 1; + } + + // We don't have to check for errors here, if this failed nothing really + // happened (just the gap won't be visible) and it will fail again at + // place where checking will cause kErrorNoHeapMemory. + ConstPool::Gap* gap = ConstPool_allocGap(self); + if (gap == nullptr) + return; + + gap->_next = self->_gaps[gapIndex]; + self->_gaps[gapIndex] = gap; + + gap->_offset = offset; + gap->_length = gapLength; + + offset += gapLength; + length -= gapLength; + } +} + +Error ConstPool::add(const void* data, size_t size, size_t& dstOffset) noexcept { + size_t treeIndex; + + if (size == 32) + treeIndex = kIndex32; + else if (size == 16) + treeIndex = kIndex16; + else if (size == 8) + treeIndex = kIndex8; + else if (size == 4) + treeIndex = kIndex4; + else if (size == 2) + treeIndex = kIndex2; + else if (size == 1) + treeIndex = kIndex1; + else + return kErrorInvalidArgument; + + ConstPool::Node* node = _tree[treeIndex].get(data); + if (node != nullptr) { + dstOffset = node->_offset; + return kErrorOk; + } + + // Before incrementing the current offset try if there is a gap that can + // be used for the requested data. + size_t offset = ~static_cast(0); + size_t gapIndex = treeIndex; + + while (gapIndex != kIndexCount - 1) { + ConstPool::Gap* gap = _gaps[treeIndex]; + + // Check if there is a gap. + if (gap != nullptr) { + size_t gapOffset = gap->_offset; + size_t gapLength = gap->_length; + + // Destroy the gap for now. + _gaps[treeIndex] = gap->_next; + ConstPool_freeGap(this, gap); + + offset = gapOffset; + ASMJIT_ASSERT(Utils::isAligned(offset, size)); + + gapLength -= size; + if (gapLength > 0) + ConstPool_addGap(this, gapOffset, gapLength); + } + + gapIndex++; + } + + if (offset == ~static_cast(0)) { + // Get how many bytes have to be skipped so the address is aligned accordingly + // to the 'size'. + size_t diff = Utils::alignDiff(_size, size); + + if (diff != 0) { + ConstPool_addGap(this, _size, diff); + _size += diff; + } + + offset = _size; + _size += size; + } + + // Add the initial node to the right index. + node = ConstPool::Tree::_newNode(_zone, data, size, offset, false); + if (node == nullptr) + return kErrorNoHeapMemory; + + _tree[treeIndex].put(node); + _alignment = Utils::iMax(_alignment, size); + + dstOffset = offset; + + // Now create a bunch of shared constants that are based on the data pattern. + // We stop at size 4, it probably doesn't make sense to split constants down + // to 1 byte. + size_t pCount = 1; + while (size > 4) { + size >>= 1; + pCount <<= 1; + + ASMJIT_ASSERT(treeIndex != 0); + treeIndex--; + + const uint8_t* pData = static_cast(data); + for (size_t i = 0; i < pCount; i++, pData += size) { + node = _tree[treeIndex].get(pData); + + if (node != nullptr) + continue; + + node = ConstPool::Tree::_newNode(_zone, pData, size, offset + (i * size), true); + _tree[treeIndex].put(node); + } + } + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::ConstPool - Reset] +// ============================================================================ + +struct ConstPoolFill { + ASMJIT_INLINE ConstPoolFill(uint8_t* dst, size_t dataSize) noexcept : + _dst(dst), + _dataSize(dataSize) {} + + ASMJIT_INLINE void visit(const ConstPool::Node* node) noexcept { + if (!node->_shared) + ::memcpy(_dst + node->_offset, node->getData(), _dataSize); + } + + uint8_t* _dst; + size_t _dataSize; +}; + +void ConstPool::fill(void* dst) const noexcept { + // Clears possible gaps, asmjit should never emit garbage to the output. + ::memset(dst, 0, _size); + + ConstPoolFill filler(static_cast(dst), 1); + for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) { + _tree[i].iterate(filler); + filler._dataSize <<= 1; + } +} + +// ============================================================================ +// [asmjit::ConstPool - Test] +// ============================================================================ + +#if defined(ASMJIT_TEST) +UNIT(base_constpool) { + Zone zone(32384 - Zone::kZoneOverhead); + ConstPool pool(&zone); + + uint32_t i; + uint32_t kCount = 1000000; + + INFO("Adding %u constants to the pool.", kCount); + { + size_t prevOffset; + size_t curOffset; + uint64_t c = ASMJIT_UINT64_C(0x0101010101010101); + + EXPECT(pool.add(&c, 8, prevOffset) == kErrorOk, + "pool.add() - Returned error."); + EXPECT(prevOffset == 0, + "pool.add() - First constant should have zero offset."); + + for (i = 1; i < kCount; i++) { + c++; + EXPECT(pool.add(&c, 8, curOffset) == kErrorOk, + "pool.add() - Returned error."); + EXPECT(prevOffset + 8 == curOffset, + "pool.add() - Returned incorrect curOffset."); + EXPECT(pool.getSize() == (i + 1) * 8, + "pool.getSize() - Reported incorrect size."); + prevOffset = curOffset; + } + + EXPECT(pool.getAlignment() == 8, + "pool.getAlignment() - Expected 8-byte alignment."); + } + + INFO("Retrieving %u constants from the pool.", kCount); + { + uint64_t c = ASMJIT_UINT64_C(0x0101010101010101); + + for (i = 0; i < kCount; i++) { + size_t offset; + EXPECT(pool.add(&c, 8, offset) == kErrorOk, + "pool.add() - Returned error."); + EXPECT(offset == i * 8, + "pool.add() - Should have reused constant."); + c++; + } + } + + INFO("Checking if the constants were split into 4-byte patterns."); + { + uint32_t c = 0x01010101; + for (i = 0; i < kCount; i++) { + size_t offset; + EXPECT(pool.add(&c, 4, offset) == kErrorOk, + "pool.add() - Returned error."); + EXPECT(offset == i * 8, + "pool.add() - Should reuse existing constant."); + c++; + } + } + + INFO("Adding 2 byte constant to misalign the current offset."); + { + uint16_t c = 0xFFFF; + size_t offset; + + EXPECT(pool.add(&c, 2, offset) == kErrorOk, + "pool.add() - Returned error."); + EXPECT(offset == kCount * 8, + "pool.add() - Didn't return expected position."); + EXPECT(pool.getAlignment() == 8, + "pool.getAlignment() - Expected 8-byte alignment."); + } + + INFO("Adding 8 byte constant to check if pool gets aligned again."); + { + uint64_t c = ASMJIT_UINT64_C(0xFFFFFFFFFFFFFFFF); + size_t offset; + + EXPECT(pool.add(&c, 8, offset) == kErrorOk, + "pool.add() - Returned error."); + EXPECT(offset == kCount * 8 + 8, + "pool.add() - Didn't return aligned offset."); + } + + INFO("Adding 2 byte constant to verify the gap is filled."); + { + uint16_t c = 0xFFFE; + size_t offset; + + EXPECT(pool.add(&c, 2, offset) == kErrorOk, + "pool.add() - Returned error."); + EXPECT(offset == kCount * 8 + 2, + "pool.add() - Didn't fill the gap."); + EXPECT(pool.getAlignment() == 8, + "pool.getAlignment() - Expected 8-byte alignment."); + } + + INFO("Checking reset functionality."); + { + pool.reset(); + + EXPECT(pool.getSize() == 0, + "pool.getSize() - Expected pool size to be zero."); + EXPECT(pool.getAlignment() == 0, + "pool.getSize() - Expected pool alignment to be zero."); + } + + INFO("Checking pool alignment when combined constants are added."); + { + uint8_t bytes[32] = { 0 }; + size_t offset; + + pool.add(bytes, 1, offset); + + EXPECT(pool.getSize() == 1, + "pool.getSize() - Expected pool size to be 1 byte."); + EXPECT(pool.getAlignment() == 1, + "pool.getSize() - Expected pool alignment to be 1 byte."); + EXPECT(offset == 0, + "pool.getSize() - Expected offset returned to be zero."); + + pool.add(bytes, 2, offset); + + EXPECT(pool.getSize() == 4, + "pool.getSize() - Expected pool size to be 4 bytes."); + EXPECT(pool.getAlignment() == 2, + "pool.getSize() - Expected pool alignment to be 2 bytes."); + EXPECT(offset == 2, + "pool.getSize() - Expected offset returned to be 2."); + + pool.add(bytes, 4, offset); + + EXPECT(pool.getSize() == 8, + "pool.getSize() - Expected pool size to be 8 bytes."); + EXPECT(pool.getAlignment() == 4, + "pool.getSize() - Expected pool alignment to be 4 bytes."); + EXPECT(offset == 4, + "pool.getSize() - Expected offset returned to be 4."); + + pool.add(bytes, 4, offset); + + EXPECT(pool.getSize() == 8, + "pool.getSize() - Expected pool size to be 8 bytes."); + EXPECT(pool.getAlignment() == 4, + "pool.getSize() - Expected pool alignment to be 4 bytes."); + EXPECT(offset == 4, + "pool.getSize() - Expected offset returned to be 8."); + + pool.add(bytes, 32, offset); + EXPECT(pool.getSize() == 64, + "pool.getSize() - Expected pool size to be 64 bytes."); + EXPECT(pool.getAlignment() == 32, + "pool.getSize() - Expected pool alignment to be 32 bytes."); + EXPECT(offset == 32, + "pool.getSize() - Expected offset returned to be 32."); + } +} +#endif // ASMJIT_TEST + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/constpool.h b/DynamicHooks/thirdparty/AsmJit/base/constpool.h new file mode 100644 index 0000000..4b25c68 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/constpool.h @@ -0,0 +1,283 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_CONSTPOOL_H +#define _ASMJIT_BASE_CONSTPOOL_H + +// [Dependencies] +#include "../base/zone.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::ConstPool] +// ============================================================================ + +//! Constant pool. +class ConstPool { + public: + ASMJIT_NO_COPY(ConstPool) + + enum { + kIndex1 = 0, + kIndex2 = 1, + kIndex4 = 2, + kIndex8 = 3, + kIndex16 = 4, + kIndex32 = 5, + kIndexCount = 6 + }; + + // -------------------------------------------------------------------------- + // [Gap] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Zone-allocated const-pool gap. + struct Gap { + //! Link to the next gap + Gap* _next; + //! Offset of the gap. + size_t _offset; + //! Remaining bytes of the gap (basically a gap size). + size_t _length; + }; + + // -------------------------------------------------------------------------- + // [Node] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Zone-allocated const-pool node. + struct Node { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void* getData() const noexcept { + return static_cast(const_cast(this) + 1); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Left/Right nodes. + Node* _link[2]; + //! Horizontal level for balance. + uint32_t _level : 31; + //! Whether this constant is shared with another. + uint32_t _shared : 1; + //! Data offset from the beginning of the pool. + uint32_t _offset; + }; + + // -------------------------------------------------------------------------- + // [Tree] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Zone-allocated const-pool tree. + struct Tree { + enum { + //! Maximum tree height == log2(1 << 64). + kHeightLimit = 64 + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Tree(size_t dataSize = 0) noexcept + : _root(nullptr), + _length(0), + _dataSize(dataSize) {} + ASMJIT_INLINE ~Tree() {} + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() noexcept { + _root = nullptr; + _length = 0; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool isEmpty() const noexcept { return _length == 0; } + ASMJIT_INLINE size_t getLength() const noexcept { return _length; } + + ASMJIT_INLINE void setDataSize(size_t dataSize) noexcept { + ASMJIT_ASSERT(isEmpty()); + _dataSize = dataSize; + } + + // -------------------------------------------------------------------------- + // [Ops] + // -------------------------------------------------------------------------- + + ASMJIT_API Node* get(const void* data) noexcept; + ASMJIT_API void put(Node* node) noexcept; + + // -------------------------------------------------------------------------- + // [Iterate] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void iterate(Visitor& visitor) const noexcept { + Node* node = const_cast(_root); + if (node == nullptr) + return; + + Node* stack[kHeightLimit]; + size_t top = 0; + + for (;;) { + Node* left = node->_link[0]; + if (left != nullptr) { + ASMJIT_ASSERT(top != kHeightLimit); + stack[top++] = node; + + node = left; + continue; + } + +L_Visit: + visitor.visit(node); + node = node->_link[1]; + if (node != nullptr) + continue; + + if (top == 0) + return; + + node = stack[--top]; + goto L_Visit; + } + } + + // -------------------------------------------------------------------------- + // [Helpers] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE Node* _newNode(Zone* zone, const void* data, size_t size, size_t offset, bool shared) noexcept { + Node* node = zone->allocT(sizeof(Node) + size); + if (node == nullptr) + return nullptr; + + node->_link[0] = nullptr; + node->_link[1] = nullptr; + node->_level = 1; + node->_shared = shared; + node->_offset = static_cast(offset); + + ::memcpy(node->getData(), data, size); + return node; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Root of the tree + Node* _root; + //! Length of the tree (count of nodes). + size_t _length; + //! Size of the data. + size_t _dataSize; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API ConstPool(Zone* zone) noexcept; + ASMJIT_API ~ConstPool() noexcept; + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_API void reset() noexcept; + + // -------------------------------------------------------------------------- + // [Ops] + // -------------------------------------------------------------------------- + + //! Get whether the constant-pool is empty. + ASMJIT_INLINE bool isEmpty() const noexcept { return _size == 0; } + //! Get the size of the constant-pool in bytes. + ASMJIT_INLINE size_t getSize() const noexcept { return _size; } + //! Get minimum alignment. + ASMJIT_INLINE size_t getAlignment() const noexcept { return _alignment; } + + //! Add a constant to the constant pool. + //! + //! The constant must have known size, which is 1, 2, 4, 8, 16 or 32 bytes. + //! The constant is added to the pool only if it doesn't not exist, otherwise + //! cached value is returned. + //! + //! AsmJit is able to subdivide added constants, so for example if you add + //! 8-byte constant 0x1122334455667788 it will create the following slots: + //! + //! 8-byte: 0x1122334455667788 + //! 4-byte: 0x11223344, 0x55667788 + //! + //! The reason is that when combining MMX/SSE/AVX code some patterns are used + //! frequently. However, AsmJit is not able to reallocate a constant that has + //! been already added. For example if you try to add 4-byte constant and then + //! 8-byte constant having the same 4-byte pattern as the previous one, two + //! independent slots will be generated by the pool. + ASMJIT_API Error add(const void* data, size_t size, size_t& dstOffset) noexcept; + + // -------------------------------------------------------------------------- + // [Fill] + // -------------------------------------------------------------------------- + + //! Fill the destination with the constants from the pool. + ASMJIT_API void fill(void* dst) const noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Zone allocator. + Zone* _zone; + //! Tree per size. + Tree _tree[kIndexCount]; + //! Gaps per size. + Gap* _gaps[kIndexCount]; + //! Gaps pool + Gap* _gapPool; + + //! Size of the pool (in bytes). + size_t _size; + //! Alignemnt. + size_t _alignment; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_CONSTPOOL_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/containers.cpp b/DynamicHooks/thirdparty/AsmJit/base/containers.cpp new file mode 100644 index 0000000..3242a0f --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/containers.cpp @@ -0,0 +1,374 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/containers.h" +#include "../base/utils.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::StringBuilder - Construction / Destruction] +// ============================================================================ + +// Should be placed in read-only memory. +static const char StringBuilder_empty[4] = { 0 }; + +StringBuilder::StringBuilder() noexcept + : _data(const_cast(StringBuilder_empty)), + _length(0), + _capacity(0), + _canFree(false) {} + +StringBuilder::~StringBuilder() noexcept { + if (_canFree) + ASMJIT_FREE(_data); +} + +// ============================================================================ +// [asmjit::StringBuilder - Prepare / Reserve] +// ============================================================================ + +char* StringBuilder::prepare(uint32_t op, size_t len) noexcept { + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + if (op == kStringOpSet) { + // We don't care here, but we can't return a NULL pointer since it indicates + // failure in memory allocation. + if (len == 0) { + if (_data != StringBuilder_empty) + _data[0] = 0; + + _length = 0; + return _data; + } + + if (_capacity < len) { + if (len >= IntTraits::maxValue() - sizeof(intptr_t) * 2) + return nullptr; + + size_t to = Utils::alignTo(len, sizeof(intptr_t)); + if (to < 256 - sizeof(intptr_t)) + to = 256 - sizeof(intptr_t); + + char* newData = static_cast(ASMJIT_ALLOC(to + sizeof(intptr_t))); + if (newData == nullptr) { + clear(); + return nullptr; + } + + if (_canFree) + ASMJIT_FREE(_data); + + _data = newData; + _capacity = to + sizeof(intptr_t) - 1; + _canFree = true; + } + + _data[len] = 0; + _length = len; + + ASMJIT_ASSERT(_length <= _capacity); + return _data; + } + + // -------------------------------------------------------------------------- + // [Append] + // -------------------------------------------------------------------------- + + else { + // We don't care here, but we can't return a nullptr pointer since it indicates + // failure in memory allocation. + if (len == 0) + return _data + _length; + + // Overflow. + if (IntTraits::maxValue() - sizeof(intptr_t) * 2 - _length < len) + return nullptr; + + size_t after = _length + len; + if (_capacity < after) { + size_t to = _capacity; + + if (to < 256) + to = 256; + + while (to < 1024 * 1024 && to < after) + to *= 2; + + if (to < after) { + to = after; + if (to < (IntTraits::maxValue() - 1024 * 32)) + to = Utils::alignTo(to, 1024 * 32); + } + + to = Utils::alignTo(to, sizeof(intptr_t)); + char* newData = static_cast(ASMJIT_ALLOC(to + sizeof(intptr_t))); + + if (newData == nullptr) + return nullptr; + + ::memcpy(newData, _data, _length); + if (_canFree) + ASMJIT_FREE(_data); + + _data = newData; + _capacity = to + sizeof(intptr_t) - 1; + _canFree = true; + } + + char* ret = _data + _length; + _data[after] = 0; + _length = after; + + ASMJIT_ASSERT(_length <= _capacity); + return ret; + } +} + +bool StringBuilder::reserve(size_t to) noexcept { + if (_capacity >= to) + return true; + + if (to >= IntTraits::maxValue() - sizeof(intptr_t) * 2) + return false; + + to = Utils::alignTo(to, sizeof(intptr_t)); + + char* newData = static_cast(ASMJIT_ALLOC(to + sizeof(intptr_t))); + if (newData == nullptr) + return false; + + ::memcpy(newData, _data, _length + 1); + if (_canFree) + ASMJIT_FREE(_data); + + _data = newData; + _capacity = to + sizeof(intptr_t) - 1; + _canFree = true; + return true; +} + +// ============================================================================ +// [asmjit::StringBuilder - Clear] +// ============================================================================ + +void StringBuilder::clear() noexcept { + if (_data != StringBuilder_empty) + _data[0] = 0; + _length = 0; +} + +// ============================================================================ +// [asmjit::StringBuilder - Methods] +// ============================================================================ + +bool StringBuilder::_opString(uint32_t op, const char* str, size_t len) noexcept { + if (len == kInvalidIndex) + len = str != nullptr ? ::strlen(str) : static_cast(0); + + char* p = prepare(op, len); + if (p == nullptr) + return false; + + ::memcpy(p, str, len); + return true; +} + +bool StringBuilder::_opChar(uint32_t op, char c) noexcept { + char* p = prepare(op, 1); + if (p == nullptr) + return false; + + *p = c; + return true; +} + +bool StringBuilder::_opChars(uint32_t op, char c, size_t len) noexcept { + char* p = prepare(op, len); + if (p == nullptr) + return false; + + ::memset(p, c, len); + return true; +} + +static const char StringBuilder_numbers[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +bool StringBuilder::_opNumber(uint32_t op, uint64_t i, uint32_t base, size_t width, uint32_t flags) noexcept { + if (base < 2 || base > 36) + base = 10; + + char buf[128]; + char* p = buf + ASMJIT_ARRAY_SIZE(buf); + + uint64_t orig = i; + char sign = '\0'; + + // -------------------------------------------------------------------------- + // [Sign] + // -------------------------------------------------------------------------- + + if ((flags & kStringFormatSigned) != 0 && static_cast(i) < 0) { + i = static_cast(-static_cast(i)); + sign = '-'; + } + else if ((flags & kStringFormatShowSign) != 0) { + sign = '+'; + } + else if ((flags & kStringFormatShowSpace) != 0) { + sign = ' '; + } + + // -------------------------------------------------------------------------- + // [Number] + // -------------------------------------------------------------------------- + + do { + uint64_t d = i / base; + uint64_t r = i % base; + + *--p = StringBuilder_numbers[r]; + i = d; + } while (i); + + size_t numberLength = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p); + + // -------------------------------------------------------------------------- + // [Alternate Form] + // -------------------------------------------------------------------------- + + if ((flags & kStringFormatAlternate) != 0) { + if (base == 8) { + if (orig != 0) + *--p = '0'; + } + if (base == 16) { + *--p = 'x'; + *--p = '0'; + } + } + + // -------------------------------------------------------------------------- + // [Width] + // -------------------------------------------------------------------------- + + if (sign != 0) + *--p = sign; + + if (width > 256) + width = 256; + + if (width <= numberLength) + width = 0; + else + width -= numberLength; + + // -------------------------------------------------------------------------- + // Write] + // -------------------------------------------------------------------------- + + size_t prefixLength = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p) - numberLength; + char* data = prepare(op, prefixLength + width + numberLength); + + if (data == nullptr) + return false; + + ::memcpy(data, p, prefixLength); + data += prefixLength; + + ::memset(data, '0', width); + data += width; + + ::memcpy(data, p + prefixLength, numberLength); + return true; +} + +bool StringBuilder::_opHex(uint32_t op, const void* data, size_t len) noexcept { + if (len >= IntTraits::maxValue() / 2) + return false; + + char* dst = prepare(op, len * 2); + if (dst == nullptr) + return false; + + const char* src = static_cast(data); + for (size_t i = 0; i < len; i++, dst += 2, src += 1) + { + dst[0] = StringBuilder_numbers[(src[0] >> 4) & 0xF]; + dst[1] = StringBuilder_numbers[(src[0] ) & 0xF]; + } + + return true; +} + +bool StringBuilder::_opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept { + char buf[1024]; + + vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap); + buf[ASMJIT_ARRAY_SIZE(buf) - 1] = '\0'; + + return _opString(op, buf); +} + +bool StringBuilder::setFormat(const char* fmt, ...) noexcept { + bool result; + + va_list ap; + va_start(ap, fmt); + result = _opVFormat(kStringOpSet, fmt, ap); + va_end(ap); + + return result; +} + +bool StringBuilder::appendFormat(const char* fmt, ...) noexcept { + bool result; + + va_list ap; + va_start(ap, fmt); + result = _opVFormat(kStringOpAppend, fmt, ap); + va_end(ap); + + return result; +} + +bool StringBuilder::eq(const char* str, size_t len) const noexcept { + const char* aData = _data; + const char* bData = str; + + size_t aLength = _length; + size_t bLength = len; + + if (bLength == kInvalidIndex) { + size_t i; + for (i = 0; i < aLength; i++) { + if (aData[i] != bData[i] || bData[i] == 0) + return false; + } + + return bData[i] == 0; + } + else { + if (aLength != bLength) + return false; + + return ::memcmp(aData, bData, aLength) == 0; + } +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/containers.h b/DynamicHooks/thirdparty/AsmJit/base/containers.h new file mode 100644 index 0000000..3a843bf --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/containers.h @@ -0,0 +1,550 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_CONTAINERS_H +#define _ASMJIT_BASE_CONTAINERS_H + +// [Dependencies] +#include "../base/globals.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::BitArray] +// ============================================================================ + +//! Fixed size bit-array. +//! +//! Used by variable liveness analysis. +struct BitArray { + // -------------------------------------------------------------------------- + // [Enums] + // -------------------------------------------------------------------------- + + enum { + kEntitySize = static_cast(sizeof(uintptr_t)), + kEntityBits = kEntitySize * 8 + }; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uintptr_t getBit(uint32_t index) const noexcept { + return (data[index / kEntityBits] >> (index % kEntityBits)) & 1; + } + + ASMJIT_INLINE void setBit(uint32_t index) noexcept { + data[index / kEntityBits] |= static_cast(1) << (index % kEntityBits); + } + + ASMJIT_INLINE void delBit(uint32_t index) noexcept { + data[index / kEntityBits] &= ~(static_cast(1) << (index % kEntityBits)); + } + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! Copy bits from `s0`, returns `true` if at least one bit is set in `s0`. + ASMJIT_INLINE bool copyBits(const BitArray* s0, uint32_t len) noexcept { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool addBits(const BitArray* s0, uint32_t len) noexcept { + return addBits(this, s0, len); + } + + ASMJIT_INLINE bool addBits(const BitArray* s0, const BitArray* s1, uint32_t len) noexcept { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i] | s1->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool andBits(const BitArray* s1, uint32_t len) noexcept { + return andBits(this, s1, len); + } + + ASMJIT_INLINE bool andBits(const BitArray* s0, const BitArray* s1, uint32_t len) noexcept { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i] & s1->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool delBits(const BitArray* s1, uint32_t len) noexcept { + return delBits(this, s1, len); + } + + ASMJIT_INLINE bool delBits(const BitArray* s0, const BitArray* s1, uint32_t len) noexcept { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i] & ~s1->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool _addBitsDelSource(BitArray* s1, uint32_t len) noexcept { + return _addBitsDelSource(this, s1, len); + } + + ASMJIT_INLINE bool _addBitsDelSource(const BitArray* s0, BitArray* s1, uint32_t len) noexcept { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t a = s0->data[i]; + uintptr_t b = s1->data[i]; + + this->data[i] = a | b; + b &= ~a; + + s1->data[i] = b; + r |= b; + } + return r != 0; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uintptr_t data[1]; +}; + +// ============================================================================ +// [asmjit::PodList] +// ============================================================================ + +//! \internal +template +class PodList { + public: + ASMJIT_NO_COPY(PodList) + + // -------------------------------------------------------------------------- + // [Link] + // -------------------------------------------------------------------------- + + struct Link { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get next node. + ASMJIT_INLINE Link* getNext() const noexcept { return _next; } + + //! Get value. + ASMJIT_INLINE T getValue() const noexcept { return _value; } + //! Set value to `value`. + ASMJIT_INLINE void setValue(const T& value) noexcept { _value = value; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + Link* _next; + T _value; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE PodList() noexcept : _first(nullptr), _last(nullptr) {} + ASMJIT_INLINE ~PodList() noexcept {} + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool isEmpty() const noexcept { return _first != nullptr; } + + ASMJIT_INLINE Link* getFirst() const noexcept { return _first; } + ASMJIT_INLINE Link* getLast() const noexcept { return _last; } + + // -------------------------------------------------------------------------- + // [Ops] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() noexcept { + _first = nullptr; + _last = nullptr; + } + + ASMJIT_INLINE void prepend(Link* link) noexcept { + link->_next = _first; + if (_first == nullptr) + _last = link; + _first = link; + } + + ASMJIT_INLINE void append(Link* link) noexcept { + link->_next = nullptr; + if (_first == nullptr) + _first = link; + else + _last->_next = link; + _last = link; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + Link* _first; + Link* _last; +}; + +// ============================================================================ +// [asmjit::StringBuilder] +// ============================================================================ + +//! String builder. +//! +//! String builder was designed to be able to build a string using append like +//! operation to append numbers, other strings, or signle characters. It can +//! allocate it's own buffer or use a buffer created on the stack. +//! +//! String builder contains method specific to AsmJit functionality, used for +//! logging or HTML output. +class StringBuilder { + public: + ASMJIT_NO_COPY(StringBuilder) + + // -------------------------------------------------------------------------- + // [Enums] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! String operation. + ASMJIT_ENUM(StringOp) { + //! Replace the current string by a given content. + kStringOpSet = 0, + //! Append a given content to the current string. + kStringOpAppend = 1 + }; + + //! \internal + //! + //! String format flags. + ASMJIT_ENUM(StringFormatFlags) { + kStringFormatShowSign = 0x00000001, + kStringFormatShowSpace = 0x00000002, + kStringFormatAlternate = 0x00000004, + kStringFormatSigned = 0x80000000 + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API StringBuilder() noexcept; + ASMJIT_API ~StringBuilder() noexcept; + + ASMJIT_INLINE StringBuilder(const _NoInit&) noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get string builder capacity. + ASMJIT_INLINE size_t getCapacity() const noexcept { return _capacity; } + //! Get length. + ASMJIT_INLINE size_t getLength() const noexcept { return _length; } + + //! Get null-terminated string data. + ASMJIT_INLINE char* getData() noexcept { return _data; } + //! Get null-terminated string data (const). + ASMJIT_INLINE const char* getData() const noexcept { return _data; } + + // -------------------------------------------------------------------------- + // [Prepare / Reserve] + // -------------------------------------------------------------------------- + + //! Prepare to set/append. + ASMJIT_API char* prepare(uint32_t op, size_t len) noexcept; + + //! Reserve `to` bytes in string builder. + ASMJIT_API bool reserve(size_t to) noexcept; + + // -------------------------------------------------------------------------- + // [Clear] + // -------------------------------------------------------------------------- + + //! Clear the content in String builder. + ASMJIT_API void clear() noexcept; + + // -------------------------------------------------------------------------- + // [Op] + // -------------------------------------------------------------------------- + + ASMJIT_API bool _opString(uint32_t op, const char* str, size_t len = kInvalidIndex) noexcept; + ASMJIT_API bool _opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept; + ASMJIT_API bool _opChar(uint32_t op, char c) noexcept; + ASMJIT_API bool _opChars(uint32_t op, char c, size_t len) noexcept; + ASMJIT_API bool _opNumber(uint32_t op, uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept; + ASMJIT_API bool _opHex(uint32_t op, const void* data, size_t len) noexcept; + + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + //! Replace the current content by `str` of `len`. + ASMJIT_INLINE bool setString(const char* str, size_t len = kInvalidIndex) noexcept { + return _opString(kStringOpSet, str, len); + } + + //! Replace the current content by formatted string `fmt`. + ASMJIT_INLINE bool setVFormat(const char* fmt, va_list ap) noexcept { + return _opVFormat(kStringOpSet, fmt, ap); + } + + //! Replace the current content by formatted string `fmt`. + ASMJIT_API bool setFormat(const char* fmt, ...) noexcept; + + //! Replace the current content by `c` character. + ASMJIT_INLINE bool setChar(char c) noexcept { + return _opChar(kStringOpSet, c); + } + + //! Replace the current content by `c` of `len`. + ASMJIT_INLINE bool setChars(char c, size_t len) noexcept { + return _opChars(kStringOpSet, c, len); + } + + //! Replace the current content by formatted integer `i`. + ASMJIT_INLINE bool setInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { + return _opNumber(kStringOpSet, i, base, width, flags | kStringFormatSigned); + } + + //! Replace the current content by formatted integer `i`. + ASMJIT_INLINE bool setUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { + return _opNumber(kStringOpSet, i, base, width, flags); + } + + //! Replace the current content by the given `data` converted to a HEX string. + ASMJIT_INLINE bool setHex(const void* data, size_t len) noexcept { + return _opHex(kStringOpSet, data, len); + } + + // -------------------------------------------------------------------------- + // [Append] + // -------------------------------------------------------------------------- + + //! Append `str` of `len`. + ASMJIT_INLINE bool appendString(const char* str, size_t len = kInvalidIndex) noexcept { + return _opString(kStringOpAppend, str, len); + } + + //! Append a formatted string `fmt` to the current content. + ASMJIT_INLINE bool appendVFormat(const char* fmt, va_list ap) noexcept { + return _opVFormat(kStringOpAppend, fmt, ap); + } + + //! Append a formatted string `fmt` to the current content. + ASMJIT_API bool appendFormat(const char* fmt, ...) noexcept; + + //! Append `c` character. + ASMJIT_INLINE bool appendChar(char c) noexcept { + return _opChar(kStringOpAppend, c); + } + + //! Append `c` of `len`. + ASMJIT_INLINE bool appendChars(char c, size_t len) noexcept { + return _opChars(kStringOpAppend, c, len); + } + + //! Append `i`. + ASMJIT_INLINE bool appendInt(int64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { + return _opNumber(kStringOpAppend, static_cast(i), base, width, flags | kStringFormatSigned); + } + + //! Append `i`. + ASMJIT_INLINE bool appendUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { + return _opNumber(kStringOpAppend, i, base, width, flags); + } + + //! Append the given `data` converted to a HEX string. + ASMJIT_INLINE bool appendHex(const void* data, size_t len) noexcept { + return _opHex(kStringOpAppend, data, len); + } + + // -------------------------------------------------------------------------- + // [_Append] + // -------------------------------------------------------------------------- + + //! Append `str` of `len`, inlined, without buffer overflow check. + ASMJIT_INLINE void _appendString(const char* str, size_t len = kInvalidIndex) noexcept { + // len should be a constant if we are inlining. + if (len == kInvalidIndex) { + char* p = &_data[_length]; + + while (*str) { + ASMJIT_ASSERT(p < _data + _capacity); + *p++ = *str++; + } + + *p = '\0'; + _length = (size_t)(p - _data); + } + else { + ASMJIT_ASSERT(_capacity - _length >= len); + + char* p = &_data[_length]; + char* pEnd = p + len; + + while (p < pEnd) + *p++ = *str++; + + *p = '\0'; + _length += len; + } + } + + //! Append `c` character, inlined, without buffer overflow check. + ASMJIT_INLINE void _appendChar(char c) noexcept { + ASMJIT_ASSERT(_capacity - _length >= 1); + + _data[_length] = c; + _length++; + _data[_length] = '\0'; + } + + //! Append `c` of `len`, inlined, without buffer overflow check. + ASMJIT_INLINE void _appendChars(char c, size_t len) noexcept { + ASMJIT_ASSERT(_capacity - _length >= len); + + char* p = &_data[_length]; + char* pEnd = p + len; + + while (p < pEnd) + *p++ = c; + + *p = '\0'; + _length += len; + } + + ASMJIT_INLINE void _appendUInt32(uint32_t i) noexcept { + char buf_[32]; + + char* pEnd = buf_ + ASMJIT_ARRAY_SIZE(buf_); + char* pBuf = pEnd; + + do { + uint32_t d = i / 10; + uint32_t r = i % 10; + + *--pBuf = static_cast(r + '0'); + i = d; + } while (i); + + ASMJIT_ASSERT(_capacity - _length >= (size_t)(pEnd - pBuf)); + char* p = &_data[_length]; + + do { + *p++ = *pBuf; + } while (++pBuf != pEnd); + + *p = '\0'; + _length = (size_t)(p - _data); + } + + // -------------------------------------------------------------------------- + // [Eq] + // -------------------------------------------------------------------------- + + //! Check for equality with other `str` of `len`. + ASMJIT_API bool eq(const char* str, size_t len = kInvalidIndex) const noexcept; + //! Check for equality with `other`. + ASMJIT_INLINE bool eq(const StringBuilder& other) const noexcept { return eq(other._data); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool operator==(const StringBuilder& other) const noexcept { return eq(other); } + ASMJIT_INLINE bool operator!=(const StringBuilder& other) const noexcept { return !eq(other); } + + ASMJIT_INLINE bool operator==(const char* str) const noexcept { return eq(str); } + ASMJIT_INLINE bool operator!=(const char* str) const noexcept { return !eq(str); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! String data. + char* _data; + //! Length. + size_t _length; + //! Capacity. + size_t _capacity; + //! Whether the string can be freed. + size_t _canFree; +}; + +// ============================================================================ +// [asmjit::StringBuilderTmp] +// ============================================================================ + +//! Temporary string builder, has statically allocated `N` bytes. +template +class StringBuilderTmp : public StringBuilder { + public: + ASMJIT_NO_COPY(StringBuilderTmp) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE StringBuilderTmp() noexcept : StringBuilder(NoInit) { + _data = _embeddedData; + _data[0] = 0; + + _length = 0; + _capacity = N; + _canFree = false; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Embedded data. + char _embeddedData[static_cast( + N + 1 + sizeof(intptr_t)) & ~static_cast(sizeof(intptr_t) - 1)]; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_CONTAINERS_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/cpuinfo.cpp b/DynamicHooks/thirdparty/AsmJit/base/cpuinfo.cpp new file mode 100644 index 0000000..20f84e4 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/cpuinfo.cpp @@ -0,0 +1,643 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/cpuinfo.h" +#include "../base/utils.h" + +#if ASMJIT_OS_POSIX +# include +# include +# include +# include +#endif // ASMJIT_OS_POSIX + +#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 +# if ASMJIT_CC_MSC_GE(14, 0, 0) + # include // Required by `__cpuid()` and `_xgetbv()`. +# endif // _MSC_VER >= 1400 +#endif + +#if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 +# if ASMJIT_OS_LINUX +# include // Required by `getauxval()`. +# endif +#endif + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::CpuInfo - Detect ARM & ARM64] +// ============================================================================ + +// ARM information has to be retrieved by the OS (this is how ARM was designed). +#if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 + +#if ASMJIT_ARCH_ARM64 +static void armPopulateBaseline64Features(CpuInfo* cpuInfo) noexcept { + // Thumb (including all variations) is only supported on ARM32. + + // ARM64 is based on ARMv8 and newer. + cpuInfo->addFeature(CpuInfo::kArmFeatureV6); + cpuInfo->addFeature(CpuInfo::kArmFeatureV7); + cpuInfo->addFeature(CpuInfo::kArmFeatureV8); + + // ARM64 comes with these features by default. + cpuInfo->addFeature(CpuInfo::kArmFeatureDSP); + cpuInfo->addFeature(CpuInfo::kArmFeatureIDIV); + cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2); + cpuInfo->addFeature(CpuInfo::kArmFeatureVFP3); + cpuInfo->addFeature(CpuInfo::kArmFeatureVFP4); +} +#endif // ASMJIT_ARCH_ARM64 + +#if ASMJIT_OS_WINDOWS +//! \internal +//! +//! Detect ARM CPU features on Windows. +//! +//! The detection is based on `IsProcessorFeaturePresent()` API call. +static void armDetectCpuInfoOnWindows(CpuInfo* cpuInfo) noexcept { +#if ASMJIT_ARCH_ARM32 + cpuInfo->setArch(kArchArm32); + + // Windows for ARM requires at least ARMv7 with DSP extensions. + cpuInfo->addFeature(CpuInfo::kArmFeatureV6); + cpuInfo->addFeature(CpuInfo::kArmFeatureV7); + cpuInfo->addFeature(CpuInfo::kArmFeatureDSP); + + // Windows for ARM requires VFP3. + cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2); + cpuInfo->addFeature(CpuInfo::kArmFeatureVFP3); + + // Windows for ARM requires and uses THUMB2. + cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB); + cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB2); +#else + cpuInfo->setArch(kArchArm64); + armPopulateBaseline64Features(cpuInfo); +#endif + + // Windows for ARM requires NEON. + cpuInfo->addFeature(CpuInfo::kArmFeatureNEON); + + // Detect additional CPU features by calling `IsProcessorFeaturePresent()`. + struct WinPFPMapping { + uint32_t pfpId, featureId; + }; + + static const WinPFPMapping mapping[] = { + { PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE , CpuInfo::kArmFeatureVFP4 }, + { PF_ARM_VFP_32_REGISTERS_AVAILABLE , CpuInfo::kArmFeatureVFP_D32 }, + { PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE, CpuInfo::kArmFeatureIDIV }, + { PF_ARM_64BIT_LOADSTORE_ATOMIC , CpuInfo::kArmFeatureAtomics64 } + }; + + for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(mapping); i++) + if (::IsProcessorFeaturePresent(mapping[i].pfpId)) + cpuInfo->addFeature(mapping[i].featureId); +} +#endif // ASMJIT_OS_WINDOWS + +#if ASMJIT_OS_LINUX +struct LinuxHWCapMapping { + uint32_t hwcapMask, featureId; +}; + +static void armDetectHWCaps(CpuInfo* cpuInfo, + unsigned long type, const LinuxHWCapMapping* mapping, size_t length) noexcept { + + unsigned long mask = getauxval(type); + for (size_t i = 0; i < length; i++) + if ((mask & mapping[i].hwcapMask) == mapping[i].hwcapMask) + cpuInfo->addFeature(mapping[i].featureId); +} + +//! \internal +//! +//! Detect ARM CPU features on Linux. +//! +//! The detection is based on `getauxval()`. +static void armDetectCpuInfoOnLinux(CpuInfo* cpuInfo) noexcept { +#if ASMJIT_ARCH_ARM32 + cpuInfo->setArch(kArchArm32); + + // `AT_HWCAP` provides ARMv7 (and less) related flags. + static const LinuxHWCapMapping hwCapMapping[] = { + { /* HWCAP_VFPv3 */ (1 << 13), CpuInfo::kArmFeatureVFP3 }, + { /* HWCAP_VFPv4 */ (1 << 16), CpuInfo::kArmFeatureVFP4 }, + { /* HWCAP_IDIVA */ (3 << 17), CpuInfo::kArmFeatureIDIV }, + { /* HWCAP_VFPD32 */ (1 << 19), CpuInfo::kArmFeatureVFP_D32 }, + { /* HWCAP_NEON */ (1 << 12), CpuInfo::kArmFeatureNEON }, + { /* HWCAP_EDSP */ (1 << 7), CpuInfo::kArmFeatureDSP } + }; + armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping)); + + // VFP3 implies VFP2. + if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP3)) + cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2); + + // VFP2 implies ARMv6. + if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP2)) + cpuInfo->addFeature(CpuInfo::kArmFeatureV6); + + // VFP3 or NEON implies ARMv7. + if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP3) || + cpuInfo->hasFeature(CpuInfo::kArmFeatureNEON)) + cpuInfo->addFeature(CpuInfo::kArmFeatureV7); + + // `AT_HWCAP2` provides ARMv8 related flags. + static const LinuxHWCapMapping hwCap2Mapping[] = { + { /* HWCAP2_AES */ (1 << 0), CpuInfo::kArmFeatureAES }, + { /* HWCAP2_CRC32 */ (1 << 4), CpuInfo::kArmFeatureCRC32 }, + { /* HWCAP2_PMULL */ (1 << 1), CpuInfo::kArmFeaturePMULL }, + { /* HWCAP2_SHA1 */ (1 << 2), CpuInfo::kArmFeatureSHA1 }, + { /* HWCAP2_SHA2 */ (1 << 3), CpuInfo::kArmFeatureSHA256 } + }; + armDetectHWCaps(cpuInfo, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping)); + + if (cpuInfo->hasFeature(CpuInfo::kArmFeatureAES ) || + cpuInfo->hasFeature(CpuInfo::kArmFeatureCRC32 ) || + cpuInfo->hasFeature(CpuInfo::kArmFeaturePMULL ) || + cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA1 ) || + cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA256)) { + cpuInfo->addFeature(CpuInfo::kArmFeatureV8); + } +#else + cpuInfo->setArch(kArchArm64); + armPopulateBaseline64Features(cpuInfo); + + // `AT_HWCAP` provides ARMv8 related flags. + static const LinuxHWCapMapping hwCapMapping[] = { + { /* HWCAP_ASIMD */ (1 << 1), CpuInfo::kArmFeatureNEON }, + { /* HWCAP_AES */ (1 << 3), CpuInfo::kArmFeatureAES }, + { /* HWCAP_CRC32 */ (1 << 7), CpuInfo::kArmFeatureCRC32 }, + { /* HWCAP_PMULL */ (1 << 4), CpuInfo::kArmFeaturePMULL }, + { /* HWCAP_SHA1 */ (1 << 5), CpuInfo::kArmFeatureSHA1 }, + { /* HWCAP_SHA2 */ (1 << 6), CpuInfo::kArmFeatureSHA256 } + { /* HWCAP_ATOMICS */ (1 << 8), CpuInfo::kArmFeatureAtomics64 } + }; + armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping)); + + // `AT_HWCAP2` is not used at the moment. +#endif +} +#endif // ASMJIT_OS_LINUX + +static void armDetectCpuInfo(CpuInfo* cpuInfo) noexcept { +#if ASMJIT_OS_WINDOWS + armDetectCpuInfoOnWindows(cpuInfo); +#elif ASMJIT_OS_LINUX + armDetectCpuInfoOnLinux(cpuInfo); +#else +# error "[asmjit] armDetectCpuInfo() - Unsupported OS." +#endif +} +#endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 + +// ============================================================================ +// [asmjit::CpuInfo - Detect X86 & X64] +// ============================================================================ + +#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 + +//! \internal +//! +//! X86 CPUID result. +struct CpuIdResult { + uint32_t eax, ebx, ecx, edx; +}; + +//! \internal +//! +//! Content of XCR register, result of XGETBV instruction. +struct XGetBVResult { + uint32_t eax, edx; +}; + +#if ASMJIT_CC_MSC && !ASMJIT_CC_MSC_GE(15, 0, 30729) && ASMJIT_ARCH_X64 +//! \internal +//! +//! HACK: VS2008 or less, 64-bit mode - `__cpuidex` doesn't exist! However, +//! 64-bit calling convention specifies the first parameter to be passed in +//! ECX, so we may be lucky if compiler doesn't move the register, otherwise +//! the result would be wrong. +static void ASMJIT_NOINLINE void x86CallCpuIdWorkaround(uint32_t inEcx, uint32_t inEax, CpuIdResult* result) noexcept { + __cpuid(reinterpret_cast(result), inEax); +} +#endif + +//! \internal +//! +//! Wrapper to call `cpuid` instruction. +static void ASMJIT_INLINE x86CallCpuId(CpuIdResult* result, uint32_t inEax, uint32_t inEcx = 0) noexcept { +#if ASMJIT_CC_MSC && ASMJIT_CC_MSC_GE(15, 0, 30729) + __cpuidex(reinterpret_cast(result), inEax, inEcx); +#elif ASMJIT_CC_MSC && ASMJIT_ARCH_X64 + x86CallCpuIdWorkaround(inEcx, inEax, result); +#elif ASMJIT_CC_MSC && ASMJIT_ARCH_X86 + uint32_t paramEax = inEax; + uint32_t paramEcx = inEcx; + uint32_t* out = reinterpret_cast(result); + + __asm { + mov eax, paramEax + mov ecx, paramEcx + mov edi, out + cpuid + mov dword ptr[edi + 0], eax + mov dword ptr[edi + 4], ebx + mov dword ptr[edi + 8], ecx + mov dword ptr[edi + 12], edx + } +#elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X86 + __asm__ __volatile__( + "mov %%ebx, %%edi\n" + "cpuid\n" + "xchg %%edi, %%ebx\n" + : "=a"(result->eax), + "=D"(result->ebx), + "=c"(result->ecx), + "=d"(result->edx) + : "a"(inEax), + "c"(inEcx) + ); +#elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X64 + __asm__ __volatile__( \ + "mov %%rbx, %%rdi\n" + "cpuid\n" + "xchg %%rdi, %%rbx\n" + : "=a"(result->eax), + "=D"(result->ebx), + "=c"(result->ecx), + "=d"(result->edx) + : "a"(inEax), + "c"(inEcx) + ); +#else +# error "[asmjit] x86CallCpuid() - Unsupported compiler." +#endif +} + +//! \internal +//! +//! Wrapper to call `xgetbv` instruction. +static void x86CallXGetBV(XGetBVResult* result, uint32_t inEcx) noexcept { +#if ASMJIT_CC_MSC_GE(16, 0, 40219) // 2010SP1+ + uint64_t value = _xgetbv(inEcx); + result->eax = static_cast(value & 0xFFFFFFFFU); + result->edx = static_cast(value >> 32); +#elif ASMJIT_CC_GCC || ASMJIT_CC_CLANG + uint32_t outEax; + uint32_t outEdx; + + // Replaced, because the world is not perfect: + // __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx)); + __asm__ __volatile__(".byte 0x0F, 0x01, 0xd0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx)); + + result->eax = outEax; + result->edx = outEdx; +#else + result->eax = 0; + result->edx = 0; +#endif +} + +//! \internal +//! +//! Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID. +static uint32_t x86GetCpuVendorID(const char* vendorString) noexcept { + struct VendorData { + uint32_t id; + char text[12]; + }; + + static const VendorData vendorList[] = { + { CpuInfo::kVendorIntel , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } }, + { CpuInfo::kVendorAMD , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } }, + { CpuInfo::kVendorVIA , { 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 } }, + { CpuInfo::kVendorVIA , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } } + }; + + uint32_t dw0 = reinterpret_cast(vendorString)[0]; + uint32_t dw1 = reinterpret_cast(vendorString)[1]; + uint32_t dw2 = reinterpret_cast(vendorString)[2]; + + for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(vendorList); i++) { + if (dw0 == reinterpret_cast(vendorList[i].text)[0] && + dw1 == reinterpret_cast(vendorList[i].text)[1] && + dw2 == reinterpret_cast(vendorList[i].text)[2]) + return vendorList[i].id; + } + + return CpuInfo::kVendorNone; +} + +static ASMJIT_INLINE void x86SimplifyBrandString(char* s) noexcept { + // Used to always clear the current character to ensure that the result + // doesn't contain garbage after the new zero terminator. + char* d = s; + + char prev = 0; + char curr = s[0]; + s[0] = '\0'; + + for (;;) { + if (curr == 0) + break; + + if (curr == ' ') { + if (prev == '@' || s[1] == ' ' || s[1] == '@') + goto L_Skip; + } + + d[0] = curr; + d++; + prev = curr; + +L_Skip: + curr = *++s; + s[0] = '\0'; + } + + d[0] = '\0'; +} + +static void x86DetectCpuInfo(CpuInfo* cpuInfo) noexcept { + uint32_t i, maxId; + + CpuIdResult regs; + XGetBVResult xcr0 = { 0, 0 }; + + // Architecture is known at compile-time. + cpuInfo->setArch(ASMJIT_ARCH_X86 ? kArchX86 : kArchX64); + + // -------------------------------------------------------------------------- + // [CPUID EAX=0x0] + // -------------------------------------------------------------------------- + + // Get vendor string/id. + x86CallCpuId(®s, 0x0); + + maxId = regs.eax; + ::memcpy(cpuInfo->_vendorString + 0, ®s.ebx, 4); + ::memcpy(cpuInfo->_vendorString + 4, ®s.edx, 4); + ::memcpy(cpuInfo->_vendorString + 8, ®s.ecx, 4); + cpuInfo->_vendorId = x86GetCpuVendorID(cpuInfo->_vendorString); + + // -------------------------------------------------------------------------- + // [CPUID EAX=0x1] + // -------------------------------------------------------------------------- + + if (maxId >= 0x1) { + // Get feature flags in ECX/EDX and family/model in EAX. + x86CallCpuId(®s, 0x1); + + // Fill family and model fields. + cpuInfo->_family = (regs.eax >> 8) & 0x0F; + cpuInfo->_model = (regs.eax >> 4) & 0x0F; + cpuInfo->_stepping = (regs.eax ) & 0x0F; + + // Use extended family and model fields. + if (cpuInfo->_family == 0x0F) { + cpuInfo->_family += ((regs.eax >> 20) & 0xFF); + cpuInfo->_model += ((regs.eax >> 16) & 0x0F) << 4; + } + + cpuInfo->_x86Data._processorType = ((regs.eax >> 12) & 0x03); + cpuInfo->_x86Data._brandIndex = ((regs.ebx ) & 0xFF); + cpuInfo->_x86Data._flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8; + cpuInfo->_x86Data._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF); + + if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE3); + if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCLMULQDQ); + if (regs.ecx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureMONITOR); + if (regs.ecx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSSE3); + if (regs.ecx & 0x00002000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG16B); + if (regs.ecx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_1); + if (regs.ecx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_2); + if (regs.ecx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMOVBE); + if (regs.ecx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePOPCNT); + if (regs.ecx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAESNI); + if (regs.ecx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE); + if (regs.ecx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE_OS); + if (regs.ecx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDRAND); + if (regs.edx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSC); + if (regs.edx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG8B); + if (regs.edx & 0x00008000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMOV); + if (regs.edx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH); + if (regs.edx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX); + if (regs.edx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR); + if (regs.edx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE) + .addFeature(CpuInfo::kX86FeatureMMX2); + if (regs.edx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE) + .addFeature(CpuInfo::kX86FeatureSSE2); + if (regs.edx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMT); + + // AMD sets multi-threading ON if it has two or more cores. + if (cpuInfo->_hwThreadsCount == 1 && cpuInfo->_vendorId == CpuInfo::kVendorAMD && (regs.edx & 0x10000000U)) + cpuInfo->_hwThreadsCount = 2; + + // Get the content of XCR0 if supported by CPU and enabled by OS. + if ((regs.ecx & 0x0C000000U) == 0x0C000000U) + x86CallXGetBV(&xcr0, 0); + + // Detect AVX+. + if (regs.ecx & 0x10000000U) { + // - XCR0[2:1] == 11b + // XMM & YMM states need to be enabled by OS. + if ((xcr0.eax & 0x00000006U) == 0x00000006U) { + cpuInfo->addFeature(CpuInfo::kX86FeatureAVX); + + if (regs.ecx & 0x00004000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA3); + if (regs.ecx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureF16C); + } + } + } + + // -------------------------------------------------------------------------- + // [CPUID EAX=0x7 ECX=0x0] + // -------------------------------------------------------------------------- + + // Detect new features if the processor supports CPUID-07. + bool maybeMPX = false; + + if (maxId >= 0x7) { + x86CallCpuId(®s, 0x7); + + if (regs.ebx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureFSGSBASE); + if (regs.ebx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI); + if (regs.ebx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureHLE); + if (regs.ebx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMEP); + if (regs.ebx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI2); + if (regs.ebx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureERMS); + if (regs.ebx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureRTM); + if (regs.ebx & 0x00004000U) maybeMPX = true; + if (regs.ebx & 0x00040000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDSEED); + if (regs.ebx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureADX); + if (regs.ebx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMAP); + if (regs.ebx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCOMMIT); + if (regs.ebx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH_OPT); + if (regs.ebx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLWB); + if (regs.ebx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSHA); + if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCHWT1); + + // Detect AVX2. + if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) + if (regs.ebx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX2); + + // Detect AVX-512+. + if (regs.ebx & 0x00010000U) { + // - XCR0[2:1] == 11b + // XMM/YMM states need to be enabled by OS. + // - XCR0[7:5] == 111b + // Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by the OS. + if ((xcr0.eax & 0x000000E6U) == 0x000000E6U) { + cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512F); + + if (regs.ebx & 0x00020000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512DQ); + if (regs.ebx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512IFMA); + if (regs.ebx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512PF); + if (regs.ebx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512ER); + if (regs.ebx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512CD); + if (regs.ebx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512BW); + if (regs.ebx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512VL); + if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512VBMI); + } + } + } + + // -------------------------------------------------------------------------- + // [CPUID EAX=0xD, ECX=0x0] + // -------------------------------------------------------------------------- + + if (maxId >= 0xD && maybeMPX) { + x86CallCpuId(®s, 0xD); + + // Both CPUID result and XCR0 has to be enabled to have support for MPX. + if (((regs.eax & xcr0.eax) & 0x00000018U) == 0x00000018U) { + cpuInfo->addFeature(CpuInfo::kX86FeatureMPX); + } + } + + // -------------------------------------------------------------------------- + // [CPUID EAX=0x80000000...maxId] + // -------------------------------------------------------------------------- + + // Several CPUID calls are required to get the whole branc string. It's easy + // to copy one DWORD at a time instead of performing a byte copy. + uint32_t* brand = reinterpret_cast(cpuInfo->_brandString); + + i = maxId = 0x80000000U; + do { + x86CallCpuId(®s, i); + switch (i) { + case 0x80000000U: + maxId = Utils::iMin(regs.eax, 0x80000004); + break; + + case 0x80000001U: + if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureLAHF_SAHF); + if (regs.ecx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureLZCNT); + if (regs.ecx & 0x00000040U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4A); + if (regs.ecx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureMSSE); + if (regs.ecx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCH); + if (regs.ecx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureTBM); + if (regs.edx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureNX); + if (regs.edx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR_OPT); + if (regs.edx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX2); + if (regs.edx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSCP); + if (regs.edx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW2) + .addFeature(CpuInfo::kX86FeatureMMX2); + if (regs.edx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW); + + if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) { + if (regs.ecx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureXOP); + if (regs.ecx & 0x00010000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA4); + } + break; + + case 0x80000002U: + case 0x80000003U: + case 0x80000004U: + *brand++ = regs.eax; + *brand++ = regs.ebx; + *brand++ = regs.ecx; + *brand++ = regs.edx; + break; + + default: + // Stop the loop, additional features can be detected in the future. + i = maxId; + break; + } + } while (i++ < maxId); + + // Simplify CPU brand string by removing unnecessary spaces. + x86SimplifyBrandString(cpuInfo->_brandString); +} +#endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 + +// ============================================================================ +// [asmjit::CpuInfo - Detect - HWThreadsCount] +// ============================================================================ + +static uint32_t cpuDetectHWThreadsCount() noexcept { +#if ASMJIT_OS_WINDOWS + SYSTEM_INFO info; + ::GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#elif ASMJIT_OS_POSIX && defined(_SC_NPROCESSORS_ONLN) + long res = ::sysconf(_SC_NPROCESSORS_ONLN); + if (res <= 0) return 1; + return static_cast(res); +#else + return 1; +#endif +} + +// ============================================================================ +// [asmjit::CpuInfo - Detect] +// ============================================================================ + +void CpuInfo::detect() noexcept { + reset(); + + // Detect the number of hardware threads available. + _hwThreadsCount = cpuDetectHWThreadsCount(); + +#if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 + armDetectCpuInfo(this); +#endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 + +#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 + x86DetectCpuInfo(this); +#endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 +} + +// ============================================================================ +// [asmjit::CpuInfo - GetHost] +// ============================================================================ + +struct HostCpuInfo : public CpuInfo { + ASMJIT_INLINE HostCpuInfo() noexcept : CpuInfo() { detect(); } +}; + +const CpuInfo& CpuInfo::getHost() noexcept { + static HostCpuInfo host; + return host; +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/cpuinfo.h b/DynamicHooks/thirdparty/AsmJit/base/cpuinfo.h new file mode 100644 index 0000000..c0b38ab --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/cpuinfo.h @@ -0,0 +1,316 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_CPUINFO_H +#define _ASMJIT_BASE_CPUINFO_H + +// [Dependencies] +#include "../base/globals.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::CpuInfo] +// ============================================================================ + +//! CPU information. +class CpuInfo { + public: + // -------------------------------------------------------------------------- + // [Vendor] + // -------------------------------------------------------------------------- + + //! CPU vendor ID. + ASMJIT_ENUM(Vendor) { + kVendorNone = 0, //!< Generic or unknown. + kVendorIntel = 1, //!< Intel vendor. + kVendorAMD = 2, //!< AMD vendor. + kVendorVIA = 3 //!< VIA vendor. + }; + + // -------------------------------------------------------------------------- + // [ArmFeatures] + // -------------------------------------------------------------------------- + + //! ARM/ARM64 CPU features. + ASMJIT_ENUM(ArmFeatures) { + kArmFeatureV6, //!< ARMv6 instruction set. + kArmFeatureV7, //!< ARMv7 instruction set. + kArmFeatureV8, //!< ARMv8 instruction set. + kArmFeatureTHUMB, //!< CPU provides THUMB v1 instruction set (ARM only). + kArmFeatureTHUMB2, //!< CPU provides THUMB v2 instruction set (ARM only). + kArmFeatureVFP2, //!< CPU provides VFPv2 instruction set. + kArmFeatureVFP3, //!< CPU provides VFPv3 instruction set. + kArmFeatureVFP4, //!< CPU provides VFPv4 instruction set. + kArmFeatureVFP_D32, //!< CPU provides 32 VFP-D (64-bit) registers. + kArmFeatureNEON, //!< CPU provides NEON instruction set. + kArmFeatureDSP, //!< CPU provides DSP extensions. + kArmFeatureIDIV, //!< CPU provides hardware support for SDIV and UDIV. + kArmFeatureAES, //!< CPU provides AES instructions (ARM64 only). + kArmFeatureCRC32, //!< CPU provides CRC32 instructions (ARM64 only). + kArmFeaturePMULL, //!< CPU provides PMULL instructions (ARM64 only). + kArmFeatureSHA1, //!< CPU provides SHA1 instructions (ARM64 only). + kArmFeatureSHA256, //!< CPU provides SHA256 instructions (ARM64 only). + kArmFeatureAtomics64, //!< CPU provides 64-bit load/store atomics (ARM64 only). + + kArmFeaturesCount //!< Count of ARM/ARM64 CPU features. + }; + + // -------------------------------------------------------------------------- + // [X86Features] + // -------------------------------------------------------------------------- + + //! X86/X64 CPU features. + ASMJIT_ENUM(X86Features) { + kX86FeatureNX = 0, //!< CPU has Not-Execute-Bit. + kX86FeatureMT, //!< CPU has multi-threading. + kX86FeatureRDTSC, //!< CPU has RDTSC. + kX86FeatureRDTSCP, //!< CPU has RDTSCP. + kX86FeatureCMOV, //!< CPU has CMOV. + kX86FeatureCMPXCHG8B, //!< CPU has CMPXCHG8B. + kX86FeatureCMPXCHG16B, //!< CPU has CMPXCHG16B (x64). + kX86FeatureCLFLUSH, //!< CPU has CLFUSH. + kX86FeatureCLFLUSH_OPT, //!< CPU has CLFUSH (optimized). + kX86FeatureCLWB, //!< CPU has CLWB. + kX86FeaturePCOMMIT, //!< CPU has PCOMMIT. + kX86FeaturePREFETCH, //!< CPU has PREFETCH. + kX86FeaturePREFETCHWT1, //!< CPU has PREFETCHWT1. + kX86FeatureLAHF_SAHF, //!< CPU has LAHF/SAHF. + kX86FeatureFXSR, //!< CPU has FXSAVE/FXRSTOR. + kX86FeatureFXSR_OPT, //!< CPU has FXSAVE/FXRSTOR (optimized). + kX86FeatureMMX, //!< CPU has MMX. + kX86FeatureMMX2, //!< CPU has extended MMX. + kX86Feature3DNOW, //!< CPU has 3dNow! + kX86Feature3DNOW2, //!< CPU has enhanced 3dNow! + kX86FeatureSSE, //!< CPU has SSE. + kX86FeatureSSE2, //!< CPU has SSE2. + kX86FeatureSSE3, //!< CPU has SSE3. + kX86FeatureSSSE3, //!< CPU has SSSE3. + kX86FeatureSSE4A, //!< CPU has SSE4.A. + kX86FeatureSSE4_1, //!< CPU has SSE4.1. + kX86FeatureSSE4_2, //!< CPU has SSE4.2. + kX86FeatureMSSE, //!< CPU has Misaligned SSE (MSSE). + kX86FeatureMONITOR, //!< CPU has MONITOR and MWAIT. + kX86FeatureMOVBE, //!< CPU has MOVBE. + kX86FeaturePOPCNT, //!< CPU has POPCNT. + kX86FeatureLZCNT, //!< CPU has LZCNT. + kX86FeatureAESNI, //!< CPU has AESNI. + kX86FeaturePCLMULQDQ, //!< CPU has PCLMULQDQ. + kX86FeatureRDRAND, //!< CPU has RDRAND. + kX86FeatureRDSEED, //!< CPU has RDSEED. + kX86FeatureSMAP, //!< CPU has SMAP (supervisor-mode access prevention). + kX86FeatureSMEP, //!< CPU has SMEP (supervisor-mode execution prevention). + kX86FeatureSHA, //!< CPU has SHA-1 and SHA-256. + kX86FeatureXSAVE, //!< CPU has XSAVE support - XSAVE/XRSTOR, XSETBV/XGETBV, and XCR0. + kX86FeatureXSAVE_OS, //!< OS has enabled XSAVE, you can call XGETBV to get value of XCR0. + kX86FeatureAVX, //!< CPU has AVX. + kX86FeatureAVX2, //!< CPU has AVX2. + kX86FeatureF16C, //!< CPU has F16C. + kX86FeatureFMA3, //!< CPU has FMA3. + kX86FeatureFMA4, //!< CPU has FMA4. + kX86FeatureXOP, //!< CPU has XOP. + kX86FeatureBMI, //!< CPU has BMI (bit manipulation instructions #1). + kX86FeatureBMI2, //!< CPU has BMI2 (bit manipulation instructions #2). + kX86FeatureADX, //!< CPU has ADX (multi-precision add-carry instruction extensions). + kX86FeatureTBM, //!< CPU has TBM (trailing bit manipulation). + kX86FeatureMPX, //!< CPU has MPX (memory protection extensions). + kX86FeatureHLE, //!< CPU has HLE. + kX86FeatureRTM, //!< CPU has RTM. + kX86FeatureERMS, //!< CPU has ERMS (enhanced REP MOVSB/STOSB). + kX86FeatureFSGSBASE, //!< CPU has FSGSBASE. + kX86FeatureAVX512F, //!< CPU has AVX-512F (foundation). + kX86FeatureAVX512CD, //!< CPU has AVX-512CD (conflict detection). + kX86FeatureAVX512PF, //!< CPU has AVX-512PF (prefetch instructions). + kX86FeatureAVX512ER, //!< CPU has AVX-512ER (exponential and reciprocal instructions). + kX86FeatureAVX512DQ, //!< CPU has AVX-512DQ (DWORD/QWORD). + kX86FeatureAVX512BW, //!< CPU has AVX-512BW (BYTE/WORD). + kX86FeatureAVX512VL, //!< CPU has AVX VL (vector length extensions). + kX86FeatureAVX512IFMA, //!< CPU has AVX IFMA (integer fused multiply add using 52-bit precision). + kX86FeatureAVX512VBMI, //!< CPU has AVX VBMI (vector byte manipulation instructions). + + kX86FeaturesCount //!< Count of X86/X64 CPU features. + }; + + // -------------------------------------------------------------------------- + // [Other] + // -------------------------------------------------------------------------- + + //! \internal + enum { + kFeaturesPerUInt32 = static_cast(sizeof(uint32_t)) * 8 + }; + + // -------------------------------------------------------------------------- + // [ArmInfo] + // -------------------------------------------------------------------------- + + struct ArmData { + }; + + // -------------------------------------------------------------------------- + // [X86Info] + // -------------------------------------------------------------------------- + + struct X86Data { + uint32_t _processorType; //!< Processor type. + uint32_t _brandIndex; //!< Brand index. + uint32_t _flushCacheLineSize; //!< Flush cache line size (in bytes). + uint32_t _maxLogicalProcessors; //!< Maximum number of addressable IDs for logical processors. + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE CpuInfo() noexcept { reset(); } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() noexcept { ::memset(this, 0, sizeof(CpuInfo)); } + + // -------------------------------------------------------------------------- + // [Detect] + // -------------------------------------------------------------------------- + + ASMJIT_API void detect() noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get CPU architecture, see \Arch. + ASMJIT_INLINE uint32_t getArch() const noexcept { return _arch; } + //! Set CPU architecture, see \Arch. + ASMJIT_INLINE void setArch(uint32_t arch) noexcept { _arch = static_cast(arch); } + + //! Get CPU vendor string. + ASMJIT_INLINE const char* getVendorString() const noexcept { return _vendorString; } + //! Get CPU brand string. + ASMJIT_INLINE const char* getBrandString() const noexcept { return _brandString; } + + //! Get CPU vendor ID. + ASMJIT_INLINE uint32_t getVendorId() const noexcept { return _vendorId; } + //! Get CPU family ID. + ASMJIT_INLINE uint32_t getFamily() const noexcept { return _family; } + //! Get CPU model ID. + ASMJIT_INLINE uint32_t getModel() const noexcept { return _model; } + //! Get CPU stepping. + ASMJIT_INLINE uint32_t getStepping() const noexcept { return _stepping; } + + //! Get number of hardware threads available. + ASMJIT_INLINE uint32_t getHwThreadsCount() const noexcept { + return _hwThreadsCount; + } + + //! Get whether CPU has a `feature`. + ASMJIT_INLINE bool hasFeature(uint32_t feature) const noexcept { + ASMJIT_ASSERT(feature < sizeof(_features) * 8); + + uint32_t pos = feature / kFeaturesPerUInt32; + uint32_t bit = feature % kFeaturesPerUInt32; + + return static_cast((_features[pos] >> bit) & 0x1); + } + + //! Add a CPU `feature`. + ASMJIT_INLINE CpuInfo& addFeature(uint32_t feature) noexcept { + ASMJIT_ASSERT(feature < sizeof(_features) * 8); + + uint32_t pos = feature / kFeaturesPerUInt32; + uint32_t bit = feature % kFeaturesPerUInt32; + + _features[pos] |= static_cast(1) << bit; + return *this; + } + + // -------------------------------------------------------------------------- + // [Accessors - ARM] + // -------------------------------------------------------------------------- + + // -------------------------------------------------------------------------- + // [Accessors - X86] + // -------------------------------------------------------------------------- + + //! Get processor type. + ASMJIT_INLINE uint32_t getX86ProcessorType() const noexcept { + return _x86Data._processorType; + } + + //! Get brand index. + ASMJIT_INLINE uint32_t getX86BrandIndex() const noexcept { + return _x86Data._brandIndex; + } + + //! Get flush cache line size. + ASMJIT_INLINE uint32_t getX86FlushCacheLineSize() const noexcept { + return _x86Data._flushCacheLineSize; + } + + //! Get maximum logical processors count. + ASMJIT_INLINE uint32_t getX86MaxLogicalProcessors() const noexcept { + return _x86Data._maxLogicalProcessors; + } + + // -------------------------------------------------------------------------- + // [Statics] + // -------------------------------------------------------------------------- + + //! Get the host CPU information. + static ASMJIT_API const CpuInfo& getHost() noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! CPU vendor string. + char _vendorString[16]; + //! CPU brand string. + char _brandString[64]; + + //! CPU architecture, see \ref Arch. + uint8_t _arch; + //! \internal + uint8_t _reserved[3]; + //! CPU vendor id, see \ref CpuVendor. + uint32_t _vendorId; + //! CPU family ID. + uint32_t _family; + //! CPU model ID. + uint32_t _model; + //! CPU stepping. + uint32_t _stepping; + + //! Number of hardware threads. + uint32_t _hwThreadsCount; + + //! CPU features (bit-array). + uint32_t _features[8]; + + // Architecture specific data. + union { + ArmData _armData; + X86Data _x86Data; + }; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_CPUINFO_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/globals.cpp b/DynamicHooks/thirdparty/AsmJit/base/globals.cpp new file mode 100644 index 0000000..1674b82 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/globals.cpp @@ -0,0 +1,94 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/globals.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::DebugUtils] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_TEXT) +static const char errorMessages[] = { + "Ok\0" + "No heap memory\0" + "No virtual memory\0" + "Invalid argument\0" + "Invalid state\0" + "Invalid architecture\0" + "Not initialized\0" + "No code generated\0" + "Code too large\0" + "Label already bound\0" + "Unknown instruction\0" + "Illegal instruction\0" + "Illegal addressing\0" + "Illegal displacement\0" + "Overlapped arguments\0" + "Unknown error\0" +}; + +static const char* findPackedString(const char* p, uint32_t id, uint32_t maxId) noexcept { + uint32_t i = 0; + + if (id > maxId) + id = maxId; + + while (i < id) { + while (p[0]) + p++; + + p++; + i++; + } + + return p; +} +#endif // ASMJIT_DISABLE_TEXT + +const char* DebugUtils::errorAsString(Error err) noexcept { +#if !defined(ASMJIT_DISABLE_TEXT) + return findPackedString(errorMessages, err, kErrorCount); +#else + static const char noMessage[] = ""; + return noMessage; +#endif +} + +void DebugUtils::debugOutput(const char* str) noexcept { +#if ASMJIT_OS_WINDOWS + ::OutputDebugStringA(str); +#else + ::fputs(str, stderr); +#endif +} + +void DebugUtils::assertionFailed(const char* file, int line, const char* msg) noexcept { + char str[1024]; + + snprintf(str, 1024, + "[asmjit] Assertion failed at %s (line %d):\n" + "[asmjit] %s\n", file, line, msg); + + // Support buggy `snprintf` implementations. + str[1023] = '\0'; + + debugOutput(str); + ::abort(); +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/globals.h b/DynamicHooks/thirdparty/AsmJit/base/globals.h new file mode 100644 index 0000000..deaf221 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/globals.h @@ -0,0 +1,666 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_GLOBALS_H +#define _ASMJIT_BASE_GLOBALS_H + +// [Dependencies] +#include "../build.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::TypeDefs] +// ============================================================================ + +//! AsmJit error core (unsigned integer). +typedef uint32_t Error; + +//! 64-bit unsigned pointer, compatible with JIT and non-JIT generators. +//! +//! This is the preferred pointer type to use with AsmJit library. It has a +//! capability to hold any pointer for any architecture making it an ideal +//! candidate for a cross-platform code generator. +typedef uint64_t Ptr; + +//! like \ref Ptr, but signed. +typedef int64_t SignedPtr; + +// ============================================================================ +// [asmjit::GlobalDefs] +// ============================================================================ + +//! Invalid index +//! +//! Invalid index is the last possible index that is never used in practice. In +//! AsmJit it is used exclusively with strings to indicate the the length of the +//! string is not known and has to be determined. +static const size_t kInvalidIndex = ~static_cast(0); + +//! Invalid base address. +static const Ptr kNoBaseAddress = static_cast(static_cast(-1)); + +//! Global constants. +ASMJIT_ENUM(GlobalDefs) { + //! Invalid value or operand id. + kInvalidValue = 0xFFFFFFFF, + + //! Invalid register index. + kInvalidReg = 0xFF, + //! Invalid variable type. + kInvalidVar = 0xFF, + + //! Host memory allocator overhead. + //! + //! The overhead is decremented from all zone allocators so the operating + //! system doesn't have to allocate one extra virtual page to keep tract of + //! the requested memory block. + //! + //! The number is actually a guess. + kMemAllocOverhead = sizeof(intptr_t) * 4, + + //! Memory grow threshold. + //! + //! After the grow threshold is reached the capacity won't be doubled + //! anymore. + kMemAllocGrowMax = 8192 * 1024 +}; + +// ============================================================================ +// [asmjit::ArchId] +// ============================================================================ + +//! CPU architecture identifier. +ASMJIT_ENUM(ArchId) { + //! No/Unknown architecture. + kArchNone = 0, + + //! X86 architecture (32-bit). + kArchX86 = 1, + //! X64 architecture (64-bit), also called AMD64. + kArchX64 = 2, + //! X32 architecture (64-bit with 32-bit pointers) (NOT USED ATM). + kArchX32 = 3, + + //! Arm architecture (32-bit). + kArchArm32 = 4, + //! Arm64 architecture (64-bit). + kArchArm64 = 5, + +#if ASMJIT_ARCH_X86 + kArchHost = kArchX86 +#elif ASMJIT_ARCH_X64 + kArchHost = kArchX64 +#elif ASMJIT_ARCH_ARM32 + kArchHost = kArchArm32 +#elif ASMJIT_ARCH_ARM64 + kArchHost = kArchArm64 +#else +# error "[asmjit] Unsupported host architecture." +#endif +}; + +// ============================================================================ +// [asmjit::CallConv] +// ============================================================================ + +//! Function calling convention. +//! +//! Calling convention is a scheme that defines how function arguments are +//! passed and how the return value handled. In assembler programming it's +//! always needed to comply with function calling conventions, because even +//! small inconsistency can cause undefined behavior or application's crash. +//! +//! Platform Independent Conventions +//! -------------------------------- +//! +//! - `kCallConvHost` - Should match the current C++ compiler native calling +//! convention. +//! +//! X86/X64 Specific Conventions +//! ---------------------------- +//! +//! List of calling conventions for 32-bit x86 mode: +//! - `kCallConvX86CDecl` - Calling convention for C runtime. +//! - `kCallConvX86StdCall` - Calling convention for WinAPI functions. +//! - `kCallConvX86MsThisCall` - Calling convention for C++ members under +//! Windows (produced by MSVC and all MSVC compatible compilers). +//! - `kCallConvX86MsFastCall` - Fastest calling convention that can be used +//! by MSVC compiler. +//! - `kCallConvX86BorlandFastCall` - Borland fastcall convention. +//! - `kCallConvX86GccFastCall` - GCC fastcall convention (2 register arguments). +//! - `kCallConvX86GccRegParm1` - GCC regparm(1) convention. +//! - `kCallConvX86GccRegParm2` - GCC regparm(2) convention. +//! - `kCallConvX86GccRegParm3` - GCC regparm(3) convention. +//! +//! List of calling conventions for 64-bit x86 mode (x64): +//! - `kCallConvX64Win` - Windows 64-bit calling convention (WIN64 ABI). +//! - `kCallConvX64Unix` - Unix 64-bit calling convention (AMD64 ABI). +//! +//! ARM Specific Conventions +//! ------------------------ +//! +//! List of ARM calling conventions: +//! - `kCallConvArm32SoftFP` - Legacy calling convention, floating point +//! arguments are passed via GP registers. +//! - `kCallConvArm32HardFP` - Modern calling convention, uses VFP registers +//! to pass floating point arguments. +ASMJIT_ENUM(CallConv) { + //! Calling convention is invalid (can't be used). + kCallConvNone = 0, + + // -------------------------------------------------------------------------- + // [X86] + // -------------------------------------------------------------------------- + + //! X86 `__cdecl` calling convention (used by C runtime and libraries). + //! + //! Compatible across MSVC and GCC. + //! + //! Arguments direction: + //! - Right to left. + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + kCallConvX86CDecl = 1, + + //! X86 `__stdcall` calling convention (used mostly by WinAPI). + //! + //! Compatible across MSVC and GCC. + //! + //! Arguments direction: + //! - Right to left. + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + kCallConvX86StdCall = 2, + + //! X86 `__thiscall` calling convention (MSVC/Intel specific). + //! + //! This is MSVC (and Intel) specific calling convention used when targeting + //! Windows platform for C++ class methods. Implicit `this` pointer (defined + //! as the first argument) is stored in `ecx` register instead of storing it + //! on the stack. + //! + //! This calling convention is implicitly used by MSVC for class functions. + //! + //! C++ class functions that have variable number of arguments use `__cdecl` + //! calling convention instead. + //! + //! Arguments direction: + //! - Right to left (except for the first argument passed in `ecx`). + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + kCallConvX86MsThisCall = 3, + + //! X86 `__fastcall` convention (MSVC/Intel specific). + //! + //! The first two arguments (evaluated from the left to the right) are passed + //! in `ecx` and `edx` registers, all others on the stack from the right to + //! the left. + //! + //! Arguments direction: + //! - Right to left (except for the first two integers passed in `ecx` and `edx`). + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + //! + //! NOTE: This calling convention differs from GCC's one. + kCallConvX86MsFastCall = 4, + + //! X86 `__fastcall` convention (Borland specific). + //! + //! The first two arguments (evaluated from the left to the right) are passed + //! in `ecx` and `edx` registers, all others on the stack from the left to + //! the right. + //! + //! Arguments direction: + //! - Left to right (except for the first two integers passed in `ecx` and `edx`). + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + //! + //! NOTE: Arguments on the stack are in passed in left to right order, which + //! is really Borland specific, all other `__fastcall` calling conventions + //! use right to left order. + kCallConvX86BorlandFastCall = 5, + + //! X86 `__fastcall` convention (GCC specific). + //! + //! The first two arguments (evaluated from the left to the right) are passed + //! in `ecx` and `edx` registers, all others on the stack from the right to + //! the left. + //! + //! Arguments direction: + //! - Right to left (except for the first two integers passed in `ecx` and `edx`). + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + //! + //! NOTE: This calling convention should be compatible with `kCallConvX86MsFastCall`. + kCallConvX86GccFastCall = 6, + + //! X86 `regparm(1)` convention (GCC specific). + //! + //! The first argument (evaluated from the left to the right) is passed in + //! `eax` register, all others on the stack from the right to the left. + //! + //! Arguments direction: + //! - Right to left (except for the first integer passed in `eax`). + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + kCallConvX86GccRegParm1 = 7, + + //! X86 `regparm(2)` convention (GCC specific). + //! + //! The first two arguments (evaluated from the left to the right) are passed + //! in `ecx` and `edx` registers, all others on the stack from the right to + //! the left. + //! + //! Arguments direction: + //! - Right to left (except for the first two integers passed in `ecx` and `edx`). + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + kCallConvX86GccRegParm2 = 8, + + //! X86 `regparm(3)` convention (GCC specific). + //! + //! Three first parameters (evaluated from left-to-right) are in + //! EAX:EDX:ECX registers, all others on the stack in right-to-left direction. + //! + //! Arguments direction: + //! - Right to left (except for the first three integers passed in `ecx`, + //! `edx`, and `ecx`). + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - `eax:edx` registers. + //! - Floating point - `fp0` register. + kCallConvX86GccRegParm3 = 9, + + // -------------------------------------------------------------------------- + // [X64] + // -------------------------------------------------------------------------- + + //! X64 calling convention used by Windows platform (WIN64-ABI). + //! + //! The first 4 arguments are passed in the following registers: + //! - 1. 32/64-bit integer in `rcx` and floating point argument in `xmm0` + //! - 2. 32/64-bit integer in `rdx` and floating point argument in `xmm1` + //! - 3. 32/64-bit integer in `r8` and floating point argument in `xmm2` + //! - 4. 32/64-bit integer in `r9` and floating point argument in `xmm3` + //! + //! If one or more argument from the first four doesn't match the list above + //! it is simply skipped. WIN64-ABI is very specific about this. + //! + //! All other arguments are pushed on the stack from the right to the left. + //! Stack has to be aligned by 16 bytes, always. There is also a 32-byte + //! shadow space on the stack that can be used to save up to four 64-bit + //! registers. + //! + //! Arguments direction: + //! - Right to left (except for all parameters passed in registers). + //! + //! Stack cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - `rax`. + //! - Floating point - `xmm0`. + //! + //! Stack is always aligned to 16 bytes. + //! + //! More information about this calling convention can be found on MSDN + //! . + kCallConvX64Win = 10, + + //! X64 calling convention used by Unix platforms (AMD64-ABI). + //! + //! First six 32 or 64-bit integer arguments are passed in `rdi`, `rsi`, + //! `rdx`, `rcx`, `r8`, and `r9` registers. First eight floating point or xmm + //! arguments are passed in `xmm0`, `xmm1`, `xmm2`, `xmm3`, `xmm4`, `xmm5`, + //! `xmm6`, and `xmm7` registers. + //! + //! There is also a red zene below the stack pointer that can be used by the + //! function. The red zone is typically from [rsp-128] to [rsp-8], however, + //! red zone can also be disabled. + //! + //! Arguments direction: + //! - Right to left (except for all arguments passed in registers). + //! + //! Stack cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - `rax`. + //! - Floating point - `xmm0`. + //! + //! Stack is always aligned to 16 bytes. + kCallConvX64Unix = 11, + + // -------------------------------------------------------------------------- + // [ARM] + // -------------------------------------------------------------------------- + + kCallConvArm32SoftFP = 16, + kCallConvArm32HardFP = 17, + + // -------------------------------------------------------------------------- + // [Internal] + // -------------------------------------------------------------------------- + + //! \internal + _kCallConvX86Start = 1, + //! \internal + _kCallConvX86End = 9, + + //! \internal + _kCallConvX64Start = 10, + //! \internal + _kCallConvX64End = 11, + + //! \internal + _kCallConvArmStart = 16, + //! \internal + _kCallConvArmEnd = 17, + + // -------------------------------------------------------------------------- + // [Host] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_DOCGEN) + //! Default calling convention based on the current compiler's settings. + //! + //! NOTE: This should be always the same as `kCallConvHostCDecl`, but some + //! compilers allow to override the default calling convention. Overriding + //! is not detected at the moment. + kCallConvHost = DETECTED_AT_COMPILE_TIME, + //! Default C calling convention based on the current compiler's settings. + kCallConvHostCDecl = DETECTED_AT_COMPILE_TIME, + //! Compatibility for `__stdcall` calling convention. + //! + //! NOTE: This enumeration is always set to a value which is compatible with + //! the current compiler's `__stdcall` calling convention. In 64-bit mode + //! there is no such convention and the value is mapped to `kCallConvX64Win` + //! or `kCallConvX64Unix`, depending on the host architecture. + kCallConvHostStdCall = DETECTED_AT_COMPILE_TIME, + //! Compatibility for `__fastcall` calling convention. + //! + //! NOTE: This enumeration is always set to a value which is compatible with + //! the current compiler's `__fastcall` calling convention. In 64-bit mode + //! there is no such convention and the value is mapped to `kCallConvX64Win` + //! or `kCallConvX64Unix`, depending on the host architecture. + kCallConvHostFastCall = DETECTED_AT_COMPILE_TIME +#elif ASMJIT_ARCH_X86 + // X86 Host Support. + kCallConvHost = kCallConvX86CDecl, + kCallConvHostCDecl = kCallConvX86CDecl, + kCallConvHostStdCall = kCallConvX86StdCall, + kCallConvHostFastCall = + ASMJIT_CC_MSC ? kCallConvX86MsFastCall : + ASMJIT_CC_GCC ? kCallConvX86GccFastCall : + ASMJIT_CC_CLANG ? kCallConvX86GccFastCall : + ASMJIT_CC_CODEGEAR ? kCallConvX86BorlandFastCall : kCallConvNone +#elif ASMJIT_ARCH_X64 + // X64 Host Support. + kCallConvHost = ASMJIT_OS_WINDOWS ? kCallConvX64Win : kCallConvX64Unix, + // These don't exist in 64-bit mode. + kCallConvHostCDecl = kCallConvHost, + kCallConvHostStdCall = kCallConvHost, + kCallConvHostFastCall = kCallConvHost +#elif ASMJIT_ARCH_ARM32 +# if defined(__SOFTFP__) + kCallConvHost = kCallConvArm32SoftFP, +# else + kCallConvHost = kCallConvArm32HardFP, +# endif + // These don't exist on ARM. + kCallConvHostCDecl = kCallConvHost, + kCallConvHostStdCall = kCallConvHost, + kCallConvHostFastCall = kCallConvHost +#else +# error "[asmjit] Couldn't determine the target's calling convention." +#endif +}; + +// ============================================================================ +// [asmjit::ErrorCode] +// ============================================================================ + +//! AsmJit error codes. +ASMJIT_ENUM(ErrorCode) { + //! No error (success). + //! + //! This is default state and state you want. + kErrorOk = 0, + + //! Heap memory allocation failed. + kErrorNoHeapMemory, + + //! Virtual memory allocation failed. + kErrorNoVirtualMemory, + + //! Invalid argument. + kErrorInvalidArgument, + + //! Invalid state. + kErrorInvalidState, + + //! Invalid architecture. + kErrorInvalidArch, + + //! The object is not initialized. + kErrorNotInitialized, + + //! No code generated. + //! + //! Returned by runtime if the code-generator contains no code. + kErrorNoCodeGenerated, + + //! Code generated is too large to fit in memory reserved. + //! + //! Returned by `StaticRuntime` in case that the code generated is too large + //! to fit in the memory already reserved for it. + kErrorCodeTooLarge, + + //! Label is already bound. + kErrorLabelAlreadyBound, + + //! Unknown instruction (an instruction ID is out of bounds or instruction + //! name is invalid). + kErrorUnknownInst, + + //! Illegal instruction. + //! + //! This status code can also be returned in X64 mode if AH, BH, CH or DH + //! registers have been used together with a REX prefix. The instruction + //! is not encodable in such case. + //! + //! Example of raising `kErrorIllegalInst` error. + //! + //! ~~~ + //! // Invalid address size. + //! a.mov(dword_ptr(eax), al); + //! + //! // Undecodable instruction - AH used with R10, however R10 can only be + //! // encoded by using REX prefix, which conflicts with AH. + //! a.mov(byte_ptr(r10), ah); + //! ~~~ + //! + //! NOTE: In debug mode assertion is raised instead of returning an error. + kErrorIllegalInst, + + //! Illegal (unencodable) addressing used. + kErrorIllegalAddresing, + + //! Illegal (unencodable) displacement used. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! Short form of jump instruction has been used, but the displacement is out + //! of bounds. + kErrorIllegalDisplacement, + + //! A variable has been assigned more than once to a function argument (Compiler). + kErrorOverlappedArgs, + + //! Count of AsmJit error codes. + kErrorCount +}; + +//! \} + +// ============================================================================ +// [asmjit::Init / NoInit] +// ============================================================================ + +#if !defined(ASMJIT_DOCGEN) +struct _Init {}; +static const _Init Init = {}; + +struct _NoInit {}; +static const _NoInit NoInit = {}; +#endif // !ASMJIT_DOCGEN + +// ============================================================================ +// [asmjit::DebugUtils] +// ============================================================================ + +namespace DebugUtils { + +//! Get a printable version of `asmjit::Error` value. +ASMJIT_API const char* errorAsString(Error err) noexcept; + +//! \addtogroup asmjit_base +//! \{ + +//! Called in debug build to output a debugging message caused by assertion +//! failure or tracing. +ASMJIT_API void debugOutput(const char* str) noexcept; + +//! Called in debug build on assertion failure. +//! +//! \param file Source file name where it happened. +//! \param line Line in the source file. +//! \param msg Message to display. +//! +//! If you have problems with assertions put a breakpoint at assertionFailed() +//! function (asmjit/base/globals.cpp) and check the call stack to locate the +//! failing code. +ASMJIT_API void ASMJIT_NORETURN assertionFailed(const char* file, int line, const char* msg) noexcept; + +//! \} + +} // DebugUtils namespace +} // asmjit namespace + +// ============================================================================ +// [ASMJIT_ASSERT] +// ============================================================================ + +#if defined(ASMJIT_DEBUG) +# define ASMJIT_ASSERT(exp) \ + do { \ + if (!(exp)) { \ + ::asmjit::DebugUtils::assertionFailed( \ + __FILE__ + ::asmjit::DebugUtils::kSourceRelativePathOffset, \ + __LINE__, \ + #exp); \ + } \ + } while (0) +# define ASMJIT_NOT_REACHED() \ + ::asmjit::DebugUtils::assertionFailed( \ + __FILE__ + ::asmjit::DebugUtils::kSourceRelativePathOffset, \ + __LINE__, \ + "MUST NOT BE REACHED") +#else +# define ASMJIT_ASSERT(exp) ASMJIT_NOP +# define ASMJIT_NOT_REACHED() ASMJIT_ASSUME(0) +#endif // DEBUG + +// ============================================================================ +// [ASMJIT_PROPAGATE_ERROR] +// ============================================================================ + +//! \internal +//! +//! Used by AsmJit to return the `_Exp_` result if it's an error. +#define ASMJIT_PROPAGATE_ERROR(_Exp_) \ + do { \ + ::asmjit::Error _errval = (_Exp_); \ + if (_errval != ::asmjit::kErrorOk) \ + return _errval; \ + } while (0) + +// ============================================================================ +// [asmjit_cast<>] +// ============================================================================ + +//! \addtogroup asmjit_base +//! \{ + +//! Cast used to cast pointer to function. It's like reinterpret_cast<>, +//! but uses internally C style cast to work with MinGW. +//! +//! If you are using single compiler and `reinterpret_cast<>` works for you, +//! there is no reason to use `asmjit_cast<>`. If you are writing +//! cross-platform software with various compiler support, consider using +//! `asmjit_cast<>` instead of `reinterpret_cast<>`. +template +static ASMJIT_INLINE T asmjit_cast(Z* p) noexcept { return (T)p; } + +//! \} + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_GLOBALS_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/hlstream.cpp b/DynamicHooks/thirdparty/AsmJit/base/hlstream.cpp new file mode 100644 index 0000000..b3f5ab3 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/hlstream.cpp @@ -0,0 +1,20 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/hlstream.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/hlstream.h b/DynamicHooks/thirdparty/AsmJit/base/hlstream.h new file mode 100644 index 0000000..89d25df --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/hlstream.h @@ -0,0 +1,1174 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_HLSTREAM_H +#define _ASMJIT_BASE_HLSTREAM_H + +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/assembler.h" +#include "../base/operand.h" + +// TODO: Cannot depend on it. +#include "../base/compilerfunc.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class Compiler; +struct VarData; +struct VarState; +struct VarMap; + +class HLInst; +class HLJump; +class HLLabel; +class HLSentinel; + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::HLNode] +// ============================================================================ + +//! Base node (HL). +//! +//! Every node represents an abstract instruction, directive, label, or macro +//! instruction that can be serialized to `Assembler`. +class HLNode { + public: + ASMJIT_NO_COPY(HLNode) + + // -------------------------------------------------------------------------- + // [Type] + // -------------------------------------------------------------------------- + + //! Type of \ref HLNode. + ASMJIT_ENUM(Type) { + //! Invalid node (internal, don't use). + kTypeNone = 0, + + // -------------------------------------------------------------------------- + // [Low-Level - Assembler / Compiler] + // -------------------------------------------------------------------------- + + //! Node is \ref HLInst or \ref HLJump. + kTypeInst, + //! Node is \ref HLData. + kTypeData, + //! Node is \ref HLAlign. + kTypeAlign, + //! Node is \ref HLLabel. + kTypeLabel, + //! Node is \ref HLComment. + kTypeComment, + //! Node is \ref HLSentinel. + kTypeSentinel, + + // -------------------------------------------------------------------------- + // [High-Level - Compiler-Only] + // -------------------------------------------------------------------------- + + //! Node is \ref HLHint. + kTypeHint, + //! Node is \ref HLFunc. + kTypeFunc, + //! Node is \ref HLRet. + kTypeRet, + //! Node is \ref HLCall. + kTypeCall, + //! Node is \ref HLCallArg. + kTypeCallArg + }; + + // -------------------------------------------------------------------------- + // [Flags] + // -------------------------------------------------------------------------- + + ASMJIT_ENUM(Flags) { + //! Whether the node has been translated, thus contains only registers. + kFlagIsTranslated = 0x0001, + + //! Whether the node was scheduled - possibly reordered, but basically this + //! is a mark that is set by scheduler after the node has been visited. + kFlagIsScheduled = 0x0002, + + //! Whether the node can be safely removed by the `Compiler` in case it's + //! unreachable. + kFlagIsRemovable = 0x0004, + + //! Whether the node is informative only and can be safely removed. + kFlagIsInformative = 0x0008, + + //! Whether the `HLInst` is a jump. + kFlagIsJmp = 0x0010, + //! Whether the `HLInst` is a conditional jump. + kFlagIsJcc = 0x0020, + + //! Whether the `HLInst` is an unconditinal jump or conditional jump that is + //! likely to be taken. + kFlagIsTaken = 0x0040, + + //! Whether the `HLNode` will return from a function. + //! + //! This flag is used by both `HLSentinel` and `HLRet`. + kFlagIsRet = 0x0080, + + //! Whether the instruction is special. + kFlagIsSpecial = 0x0100, + + //! Whether the instruction is an FPU instruction. + kFlagIsFp = 0x0200 + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLNode`. + //! + //! NOTE: Always use compiler to create nodes. + ASMJIT_INLINE HLNode(Compiler* compiler, uint32_t type) noexcept; // Defined-Later. + + //! Destroy the `HLNode`. + //! + //! NOTE: Nodes are zone allocated, there should be no code in the destructor. + ASMJIT_INLINE ~HLNode() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors - List] + // -------------------------------------------------------------------------- + + //! Get previous node in the compiler stream. + ASMJIT_INLINE HLNode* getPrev() const noexcept { return _prev; } + //! Get next node in the compiler stream. + ASMJIT_INLINE HLNode* getNext() const noexcept { return _next; } + + // -------------------------------------------------------------------------- + // [Accessors - Comment] + // -------------------------------------------------------------------------- + + //! Get an inline comment string. + ASMJIT_INLINE const char* getComment() const noexcept { return _comment; } + //! Set an inline comment string to `comment`. + ASMJIT_INLINE void setComment(const char* comment) noexcept { _comment = comment; } + + // -------------------------------------------------------------------------- + // [Accessors - Type and Flags] + // -------------------------------------------------------------------------- + + //! Get the node type, see \ref Type. + ASMJIT_INLINE uint32_t getType() const noexcept { return _type; } + //! Get the node flags. + ASMJIT_INLINE uint32_t getFlags() const noexcept { return _flags; } + + //! Get whether the instruction has flag `flag`. + ASMJIT_INLINE bool hasFlag(uint32_t flag) const noexcept { return (static_cast(_flags) & flag) != 0; } + //! Set node flags to `flags`. + ASMJIT_INLINE void setFlags(uint32_t flags) noexcept { _flags = static_cast(flags); } + //! Add instruction `flags`. + ASMJIT_INLINE void orFlags(uint32_t flags) noexcept { _flags |= static_cast(flags); } + //! And instruction `flags`. + ASMJIT_INLINE void andFlags(uint32_t flags) noexcept { _flags &= static_cast(flags); } + //! Clear instruction `flags`. + ASMJIT_INLINE void andNotFlags(uint32_t flags) noexcept { _flags &= ~static_cast(flags); } + + //! Get whether the node has beed fetched. + ASMJIT_INLINE bool isFetched() const noexcept { return _flowId != 0; } + //! Get whether the node has been translated. + ASMJIT_INLINE bool isTranslated() const noexcept { return hasFlag(kFlagIsTranslated); } + //! Get whether the node has been translated. + ASMJIT_INLINE bool isScheduled() const noexcept { return hasFlag(kFlagIsScheduled); } + + //! Get whether the node is removable if it's in unreachable code block. + ASMJIT_INLINE bool isRemovable() const noexcept { return hasFlag(kFlagIsRemovable); } + //! Get whether the node is informative only (comment, hint). + ASMJIT_INLINE bool isInformative() const noexcept { return hasFlag(kFlagIsInformative); } + + //! Whether the node is `HLLabel`. + ASMJIT_INLINE bool isLabel() const noexcept { return _type == kTypeLabel; } + //! Whether the `HLInst` node is an unconditional jump. + ASMJIT_INLINE bool isJmp() const noexcept { return hasFlag(kFlagIsJmp); } + //! Whether the `HLInst` node is a conditional jump. + ASMJIT_INLINE bool isJcc() const noexcept { return hasFlag(kFlagIsJcc); } + //! Whether the `HLInst` node is a conditional/unconditional jump. + ASMJIT_INLINE bool isJmpOrJcc() const noexcept { return hasFlag(kFlagIsJmp | kFlagIsJcc); } + //! Whether the `HLInst` node is a return. + ASMJIT_INLINE bool isRet() const noexcept { return hasFlag(kFlagIsRet); } + + //! Get whether the node is `HLInst` and the instruction is special. + ASMJIT_INLINE bool isSpecial() const noexcept { return hasFlag(kFlagIsSpecial); } + //! Get whether the node is `HLInst` and the instruction uses x87-FPU. + ASMJIT_INLINE bool isFp() const noexcept { return hasFlag(kFlagIsFp); } + + // -------------------------------------------------------------------------- + // [Accessors - FlowId] + // -------------------------------------------------------------------------- + + //! Get flow index. + ASMJIT_INLINE uint32_t getFlowId() const noexcept { return _flowId; } + //! Set flow index. + ASMJIT_INLINE void setFlowId(uint32_t flowId) noexcept { _flowId = flowId; } + + // -------------------------------------------------------------------------- + // [Accessors - TokenId] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool hasTokenId(uint32_t id) const noexcept { return _tokenId == id; } + ASMJIT_INLINE uint32_t getTokenId() const noexcept { return _tokenId; } + ASMJIT_INLINE void setTokenId(uint32_t id) noexcept { _tokenId = id; } + + // -------------------------------------------------------------------------- + // [Accessors - VarMap] + // -------------------------------------------------------------------------- + + //! Get whether node contains variable allocation instructions. + ASMJIT_INLINE bool hasMap() const noexcept { return _map != nullptr; } + //! Get variable allocation instructions. + ASMJIT_INLINE VarMap* getMap() const noexcept { return _map; } + //! Get variable allocation instructions casted to `T*`. + template + ASMJIT_INLINE T* getMap() const noexcept { return static_cast(_map); } + //! Set variable allocation instructions. + ASMJIT_INLINE void setMap(VarMap* map) noexcept { _map = map; } + + // -------------------------------------------------------------------------- + // [Accessors - VarState] + // -------------------------------------------------------------------------- + + //! Get whether the node has an associated `VarState`. + ASMJIT_INLINE bool hasState() const noexcept { return _state != nullptr; } + //! Get node state. + ASMJIT_INLINE VarState* getState() const noexcept { return _state; } + //! Get node state casted to `T*`. + template + ASMJIT_INLINE T* getState() const noexcept { return static_cast(_state); } + //! Set node state. + ASMJIT_INLINE void setState(VarState* state) noexcept { _state = state; } + + // -------------------------------------------------------------------------- + // [Accessors - Liveness] + // -------------------------------------------------------------------------- + + //! Get whether the node has variable liveness bits. + ASMJIT_INLINE bool hasLiveness() const noexcept { return _liveness != nullptr; } + //! Get variable liveness bits. + ASMJIT_INLINE BitArray* getLiveness() const noexcept { return _liveness; } + //! Set variable liveness bits. + ASMJIT_INLINE void setLiveness(BitArray* liveness) noexcept { _liveness = liveness; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Previous node. + HLNode* _prev; + //! Next node. + HLNode* _next; + + //! Node type, see \ref Type. + uint8_t _type; + //! Count of operands (if the node has operands, otherwise zero). + uint8_t _opCount; + //! Flags, different meaning for every type of the node. + uint16_t _flags; + + //! Flow index. + uint32_t _flowId; + + //! Processing token ID. + //! + //! Used by some algorithms to mark nodes as visited. If the token is + //! generated in an incrementing way the visitor can just mark nodes it + //! visits and them compare the `HLNode`s token with it's local token. + //! If they match the node has been visited already. Then the visitor + //! doesn't need to clean things up as the next time the token will be + //! different. + uint32_t _tokenId; + + // TODO: 32-bit gap + + //! Inline comment string, initially set to nullptr. + const char* _comment; + + //! Variable mapping (VarAttr to VarData), initially nullptr, filled during + //! fetch phase. + VarMap* _map; + + //! Variable liveness bits (initially nullptr, filled by analysis phase). + BitArray* _liveness; + + //! Saved state. + //! + //! Initially nullptr, not all nodes have saved state, only branch/flow control + //! nodes. + VarState* _state; +}; + +// ============================================================================ +// [asmjit::HLInst] +// ============================================================================ + +//! Instruction (HL). +//! +//! Wraps an instruction with its options and operands. +class HLInst : public HLNode { + public: + ASMJIT_NO_COPY(HLInst) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLInst` instance. + ASMJIT_INLINE HLInst(Compiler* compiler, uint32_t instId, uint32_t instOptions, Operand* opList, uint32_t opCount) noexcept + : HLNode(compiler, kTypeInst) { + + orFlags(kFlagIsRemovable); + _instId = static_cast(instId); + _reserved = 0; + _instOptions = instOptions; + + _opCount = static_cast(opCount); + _opList = opList; + + _updateMemOp(); + } + + //! Destroy the `HLInst` instance. + ASMJIT_INLINE ~HLInst() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the instruction id, see `X86InstId`. + ASMJIT_INLINE uint32_t getInstId() const noexcept { return _instId; } + //! Set the instruction id to `instId`. + //! + //! NOTE: Please do not modify instruction code if you don't know what you + //! are doing. Incorrect instruction code and/or operands can cause random + //! errors in production builds and will most probably trigger assertion + //! failures in debug builds. + ASMJIT_INLINE void setInstId(uint32_t instId) noexcept { _instId = static_cast(instId); } + + //! Whether the instruction is either a jump or a conditional jump likely to + //! be taken. + ASMJIT_INLINE bool isTaken() const noexcept { return hasFlag(kFlagIsTaken); } + + //! Get emit options. + ASMJIT_INLINE uint32_t getOptions() const noexcept { return _instOptions; } + //! Set emit options. + ASMJIT_INLINE void setOptions(uint32_t options) noexcept { _instOptions = options; } + //! Add emit options. + ASMJIT_INLINE void addOptions(uint32_t options) noexcept { _instOptions |= options; } + //! Mask emit options. + ASMJIT_INLINE void andOptions(uint32_t options) noexcept { _instOptions &= options; } + //! Clear emit options. + ASMJIT_INLINE void delOptions(uint32_t options) noexcept { _instOptions &= ~options; } + + //! Get operands count. + ASMJIT_INLINE uint32_t getOpCount() const noexcept { return _opCount; } + //! Get operands list. + ASMJIT_INLINE Operand* getOpList() noexcept { return _opList; } + //! \overload + ASMJIT_INLINE const Operand* getOpList() const noexcept { return _opList; } + + //! Get whether the instruction contains a memory operand. + ASMJIT_INLINE bool hasMemOp() const noexcept { return _memOpIndex != 0xFF; } + //! Get memory operand. + //! + //! NOTE: Can only be called if the instruction has such operand, + //! see `hasMemOp()`. + ASMJIT_INLINE BaseMem* getMemOp() const noexcept { + ASMJIT_ASSERT(hasMemOp()); + return static_cast(&_opList[_memOpIndex]); + } + //! \overload + template + ASMJIT_INLINE T* getMemOp() const noexcept { + ASMJIT_ASSERT(hasMemOp()); + return static_cast(&_opList[_memOpIndex]); + } + + //! Set memory operand index, `0xFF` means no memory operand. + ASMJIT_INLINE void setMemOpIndex(uint32_t index) noexcept { _memOpIndex = static_cast(index); } + //! Reset memory operand index to `0xFF` (no operand). + ASMJIT_INLINE void resetMemOpIndex() noexcept { _memOpIndex = 0xFF; } + + // -------------------------------------------------------------------------- + // [Utils] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void _updateMemOp() noexcept { + Operand* opList = getOpList(); + uint32_t opCount = getOpCount(); + + uint32_t i; + for (i = 0; i < opCount; i++) + if (opList[i].isMem()) + goto L_Update; + i = 0xFF; + +L_Update: + setMemOpIndex(i); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Instruction ID, see `InstId`. + uint16_t _instId; + //! \internal + uint8_t _memOpIndex; + //! \internal + uint8_t _reserved; + //! Instruction options, see `InstOptions`. + uint32_t _instOptions; + + //! Operands list. + Operand* _opList; +}; + +// ============================================================================ +// [asmjit::HLJump] +// ============================================================================ + +//! Conditional or direct jump (HL). +//! +//! Extension of `HLInst` node, which stores more information about the jump. +class HLJump : public HLInst { + public: + ASMJIT_NO_COPY(HLJump) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE HLJump(Compiler* compiler, uint32_t code, uint32_t options, Operand* opList, uint32_t opCount) noexcept + : HLInst(compiler, code, options, opList, opCount), + _target(nullptr), + _jumpNext(nullptr) {} + ASMJIT_INLINE ~HLJump() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE HLLabel* getTarget() const noexcept { return _target; } + ASMJIT_INLINE HLJump* getJumpNext() const noexcept { return _jumpNext; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Target node. + HLLabel* _target; + //! Next jump to the same target in a single linked-list. + HLJump* _jumpNext; +}; + +// ============================================================================ +// [asmjit::HLData] +// ============================================================================ + +//! Data (HL). +//! +//! Wraps `.data` directive. The node contains data that will be placed at the +//! node's position in the assembler stream. The data is considered to be RAW; +//! no analysis nor byte-order conversion is performed on RAW data. +class HLData : public HLNode { + public: + ASMJIT_NO_COPY(HLData) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + enum { kInlineBufferSize = 12 }; + + //! Create a new `HLData` instance. + ASMJIT_INLINE HLData(Compiler* compiler, void* data, uint32_t size) noexcept + : HLNode(compiler, kTypeData) { + + _size = size; + if (size <= kInlineBufferSize) { + if (data != nullptr) + ::memcpy(_data.buf, data, size); + } + else { + _data.ptr = static_cast(data); + } + } + + //! Destroy the `HLData` instance. + ASMJIT_INLINE ~HLData() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get size of the data. + uint32_t getSize() const noexcept { return _size; } + //! Get pointer to the data. + uint8_t* getData() const noexcept { return _size <= kInlineBufferSize ? const_cast(_data.buf) : _data.ptr; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + //! data buffer. + uint8_t buf[kInlineBufferSize]; + //! Data buffer. + uint8_t* ptr; + } _data; + + //! Size of the data. + uint32_t _size; +}; + +// ============================================================================ +// [asmjit::HLAlign] +// ============================================================================ + +//! Align directive (HL). +//! +//! Wraps `.align` directive. +class HLAlign : public HLNode { + public: + ASMJIT_NO_COPY(HLAlign) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLAlign` instance. + ASMJIT_INLINE HLAlign(Compiler* compiler, uint32_t alignMode, uint32_t offset) noexcept + : HLNode(compiler, kTypeAlign) { + + _alignMode = alignMode; + _offset = offset; + } + + //! Destroy the `HLAlign` instance. + ASMJIT_INLINE ~HLAlign() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get align mode. + ASMJIT_INLINE uint32_t getAlignMode() const noexcept { return _alignMode; } + //! Set align mode. + ASMJIT_INLINE void setAlignMode(uint32_t alignMode) noexcept { _alignMode = alignMode; } + + //! Get align offset in bytes. + ASMJIT_INLINE uint32_t getOffset() const noexcept { return _offset; } + //! Set align offset in bytes to `offset`. + ASMJIT_INLINE void setOffset(uint32_t offset) noexcept { _offset = offset; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Align mode, see \ref AlignMode. + uint32_t _alignMode; + //! Align offset (in bytes). + uint32_t _offset; +}; + +// ============================================================================ +// [asmjit::HLLabel] +// ============================================================================ + +//! label (HL). +class HLLabel : public HLNode { + public: + ASMJIT_NO_COPY(HLLabel) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLLabel` instance. + ASMJIT_INLINE HLLabel(Compiler* compiler, uint32_t labelId) noexcept + : HLNode(compiler, kTypeLabel) { + + _id = labelId; + _numRefs = 0; + _from = nullptr; + } + + //! Destroy the `HLLabel` instance. + ASMJIT_INLINE ~HLLabel() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get target label. + ASMJIT_INLINE Label getLabel() const noexcept { return Label(_id); } + //! Get target label id. + ASMJIT_INLINE uint32_t getLabelId() const noexcept { return _id; } + + //! Get first jmp instruction. + ASMJIT_INLINE HLJump* getFrom() const noexcept { return _from; } + + //! Get whether the node has assigned state. + ASMJIT_INLINE bool hasState() const noexcept { return _state != nullptr; } + //! Get state for this target. + ASMJIT_INLINE VarState* getState() const noexcept { return _state; } + //! Set state for this target. + ASMJIT_INLINE void setState(VarState* state) noexcept { _state = state; } + + //! Get number of jumps to this target. + ASMJIT_INLINE uint32_t getNumRefs() const noexcept { return _numRefs; } + //! Set number of jumps to this target. + ASMJIT_INLINE void setNumRefs(uint32_t i) noexcept { _numRefs = i; } + + //! Add number of jumps to this target. + ASMJIT_INLINE void addNumRefs(uint32_t i = 1) noexcept { _numRefs += i; } + //! Subtract number of jumps to this target. + ASMJIT_INLINE void subNumRefs(uint32_t i = 1) noexcept { _numRefs -= i; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Label id. + uint32_t _id; + //! Count of jumps here. + uint32_t _numRefs; + + //! First jump instruction that points to this target (label). + HLJump* _from; +}; + +// ============================================================================ +// [asmjit::HLComment] +// ============================================================================ + +//! Comment (HL). +class HLComment : public HLNode { + public: + ASMJIT_NO_COPY(HLComment) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLComment` instance. + ASMJIT_INLINE HLComment(Compiler* compiler, const char* comment) noexcept + : HLNode(compiler, kTypeComment) { + + orFlags(kFlagIsRemovable | kFlagIsInformative); + _comment = comment; + } + + //! Destroy the `HLComment` instance. + ASMJIT_INLINE ~HLComment() noexcept {} +}; + +// ============================================================================ +// [asmjit::HLSentinel] +// ============================================================================ + +//! Sentinel (HL). +class HLSentinel : public HLNode { + public: + ASMJIT_NO_COPY(HLSentinel) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLSentinel` instance. + ASMJIT_INLINE HLSentinel(Compiler* compiler) noexcept + : HLNode(compiler, kTypeSentinel) { + orFlags(kFlagIsRet); + } + + //! Destroy the `HLSentinel` instance. + ASMJIT_INLINE ~HLSentinel() noexcept {} +}; + +// ============================================================================ +// [asmjit::HLHint] +// ============================================================================ + +//! Hint node. +class HLHint : public HLNode { + public: + ASMJIT_NO_COPY(HLHint) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLHint` instance. + ASMJIT_INLINE HLHint(Compiler* compiler, VarData* vd, uint32_t hint, uint32_t value) noexcept + : HLNode(compiler, kTypeHint) { + + orFlags(kFlagIsRemovable | kFlagIsInformative); + _vd = vd; + _hint = hint; + _value = value; + } + + //! Destroy the `HLHint` instance. + ASMJIT_INLINE ~HLHint() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get variable. + ASMJIT_INLINE VarData* getVd() const noexcept { return _vd; } + + //! Get hint it (see `kVarHint)`. + ASMJIT_INLINE uint32_t getHint() const noexcept { return _hint; } + //! Set hint it (see `kVarHint)`. + ASMJIT_INLINE void setHint(uint32_t hint) noexcept { _hint = hint; } + + //! Get hint value. + ASMJIT_INLINE uint32_t getValue() const noexcept { return _value; } + //! Set hint value. + ASMJIT_INLINE void setValue(uint32_t value) noexcept { _value = value; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Variable. + VarData* _vd; + //! Hint id. + uint32_t _hint; + //! Value. + uint32_t _value; +}; + +// ============================================================================ +// [asmjit::HLFunc] +// ============================================================================ + +//! Function (HL). +class HLFunc : public HLNode { + public: + ASMJIT_NO_COPY(HLFunc) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLFunc` instance. + //! + //! Always use `Compiler::addFunc()` to create an `HLFunc` instance. + ASMJIT_INLINE HLFunc(Compiler* compiler) noexcept + : HLNode(compiler, kTypeFunc), + _entryNode(nullptr), + _exitNode(nullptr), + _decl(nullptr), + _end(nullptr), + _args(nullptr), + _funcHints(Utils::mask(kFuncHintNaked)), + _funcFlags(0), + _expectedStackAlignment(0), + _requiredStackAlignment(0), + _redZoneSize(0), + _spillZoneSize(0), + _argStackSize(0), + _memStackSize(0), + _callStackSize(0) {} + + //! Destroy the `HLFunc` instance. + ASMJIT_INLINE ~HLFunc() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get function entry `HLLabel`. + ASMJIT_INLINE HLLabel* getEntryNode() const noexcept { return _entryNode; } + //! Get function exit `HLLabel`. + ASMJIT_INLINE HLLabel* getExitNode() const noexcept { return _exitNode; } + + //! Get function entry label. + ASMJIT_INLINE Label getEntryLabel() const noexcept { return _entryNode->getLabel(); } + //! Get function exit label. + ASMJIT_INLINE Label getExitLabel() const noexcept { return _exitNode->getLabel(); } + + //! Get the function end sentinel. + ASMJIT_INLINE HLSentinel* getEnd() const noexcept { return _end; } + //! Get function declaration. + ASMJIT_INLINE FuncDecl* getDecl() const noexcept { return _decl; } + + //! Get arguments count. + ASMJIT_INLINE uint32_t getNumArgs() const noexcept { return _decl->getNumArgs(); } + //! Get arguments list. + ASMJIT_INLINE VarData** getArgs() const noexcept { return _args; } + + //! Get argument at `i`. + ASMJIT_INLINE VarData* getArg(uint32_t i) const noexcept { + ASMJIT_ASSERT(i < getNumArgs()); + return _args[i]; + } + + //! Set argument at `i`. + ASMJIT_INLINE void setArg(uint32_t i, VarData* vd) noexcept { + ASMJIT_ASSERT(i < getNumArgs()); + _args[i] = vd; + } + + //! Reset argument at `i`. + ASMJIT_INLINE void resetArg(uint32_t i) noexcept { + ASMJIT_ASSERT(i < getNumArgs()); + _args[i] = nullptr; + } + + //! Get function hints. + ASMJIT_INLINE uint32_t getFuncHints() const noexcept { return _funcHints; } + //! Get function flags. + ASMJIT_INLINE uint32_t getFuncFlags() const noexcept { return _funcFlags; } + + //! Get whether the _funcFlags has `flag` + ASMJIT_INLINE bool hasFuncFlag(uint32_t flag) const noexcept { return (_funcFlags & flag) != 0; } + //! Set function `flag`. + ASMJIT_INLINE void addFuncFlags(uint32_t flags) noexcept { _funcFlags |= flags; } + //! Clear function `flag`. + ASMJIT_INLINE void clearFuncFlags(uint32_t flags) noexcept { _funcFlags &= ~flags; } + + //! Get whether the function is naked. + ASMJIT_INLINE bool isNaked() const noexcept { return hasFuncFlag(kFuncFlagIsNaked); } + //! Get whether the function is also a caller. + ASMJIT_INLINE bool isCaller() const noexcept { return hasFuncFlag(kFuncFlagIsCaller); } + //! Get whether the required stack alignment is lower than expected one, + //! thus it has to be aligned manually. + ASMJIT_INLINE bool isStackMisaligned() const noexcept { return hasFuncFlag(kFuncFlagIsStackMisaligned); } + //! Get whether the stack pointer is adjusted inside function prolog/epilog. + ASMJIT_INLINE bool isStackAdjusted() const noexcept { return hasFuncFlag(kFuncFlagIsStackAdjusted); } + + //! Get whether the function is finished. + ASMJIT_INLINE bool isFinished() const noexcept { return hasFuncFlag(kFuncFlagIsFinished); } + + //! Get expected stack alignment. + ASMJIT_INLINE uint32_t getExpectedStackAlignment() const noexcept { + return _expectedStackAlignment; + } + + //! Set expected stack alignment. + ASMJIT_INLINE void setExpectedStackAlignment(uint32_t alignment) noexcept { + _expectedStackAlignment = alignment; + } + + //! Get required stack alignment. + ASMJIT_INLINE uint32_t getRequiredStackAlignment() const noexcept { + return _requiredStackAlignment; + } + + //! Set required stack alignment. + ASMJIT_INLINE void setRequiredStackAlignment(uint32_t alignment) noexcept { + _requiredStackAlignment = alignment; + } + + //! Update required stack alignment so it's not lower than expected + //! stack alignment. + ASMJIT_INLINE void updateRequiredStackAlignment() noexcept { + if (_requiredStackAlignment <= _expectedStackAlignment) { + _requiredStackAlignment = _expectedStackAlignment; + clearFuncFlags(kFuncFlagIsStackMisaligned); + } + else { + addFuncFlags(kFuncFlagIsStackMisaligned); + } + } + + //! Set stack "Red Zone" size. + ASMJIT_INLINE uint32_t getRedZoneSize() const noexcept { return _redZoneSize; } + //! Get stack "Red Zone" size. + ASMJIT_INLINE void setRedZoneSize(uint32_t s) noexcept { _redZoneSize = static_cast(s); } + + //! Set stack "Spill Zone" size. + ASMJIT_INLINE uint32_t getSpillZoneSize() const noexcept { return _spillZoneSize; } + //! Get stack "Spill Zone" size. + ASMJIT_INLINE void setSpillZoneSize(uint32_t s) noexcept { _spillZoneSize = static_cast(s); } + + //! Get stack size used by function arguments. + ASMJIT_INLINE uint32_t getArgStackSize() const noexcept { return _argStackSize; } + + //! Get stack size used by variables and memory allocated on the stack. + ASMJIT_INLINE uint32_t getMemStackSize() const noexcept { return _memStackSize; } + + //! Get stack size used by function calls. + ASMJIT_INLINE uint32_t getCallStackSize() const noexcept { return _callStackSize; } + //! Merge stack size used by function call with `s`. + ASMJIT_INLINE void mergeCallStackSize(uint32_t s) noexcept { if (_callStackSize < s) _callStackSize = s; } + + // -------------------------------------------------------------------------- + // [Hints] + // -------------------------------------------------------------------------- + + //! Set function hint. + ASMJIT_INLINE void setHint(uint32_t hint, uint32_t value) noexcept { + ASMJIT_ASSERT(hint <= 31); + ASMJIT_ASSERT(value <= 1); + + _funcHints &= ~(1 << hint); + _funcHints |= (value << hint); + } + + //! Get function hint. + ASMJIT_INLINE uint32_t getHint(uint32_t hint) const noexcept { + ASMJIT_ASSERT(hint <= 31); + return (_funcHints >> hint) & 0x1; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Function entry. + HLLabel* _entryNode; + //! Function exit. + HLLabel* _exitNode; + + //! Function declaration. + FuncDecl* _decl; + //! Function end. + HLSentinel* _end; + + //! Arguments list as `VarData`. + VarData** _args; + + //! Function hints; + uint32_t _funcHints; + //! Function flags. + uint32_t _funcFlags; + + //! Expected stack alignment (we depend on this value). + //! + //! NOTE: It can be global alignment given by the OS or described by the + //! target platform ABI. + uint32_t _expectedStackAlignment; + //! Required stack alignment (required by SIMD instructions). + uint32_t _requiredStackAlignment; + + //! The "Red Zone" size - count of bytes which might be accessed by a left + //! function without adjusting the stack pointer (`esp` or `rsp`) (AMD64 ABI). + uint16_t _redZoneSize; + + //! The "Spill Zone" size - count of bytes after the function return address + //! that can be used by the function to spill variables in (WIN64 ABI). + uint16_t _spillZoneSize; + + //! Stack size needed for function arguments. + uint32_t _argStackSize; + //! Stack size needed for all variables and memory allocated on the stack. + uint32_t _memStackSize; + //! Stack size needed to call other functions. + uint32_t _callStackSize; +}; + +// ============================================================================ +// [asmjit::HLRet] +// ============================================================================ + +//! Function return (HL). +class HLRet : public HLNode { + public: + ASMJIT_NO_COPY(HLRet) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLRet` instance. + ASMJIT_INLINE HLRet(Compiler* compiler, const Operand& o0, const Operand& o1) noexcept + : HLNode(compiler, kTypeRet) { + + orFlags(kFlagIsRet); + _ret[0] = o0; + _ret[1] = o1; + } + + //! Destroy the `HLRet` instance. + ASMJIT_INLINE ~HLRet() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the first return operand. + ASMJIT_INLINE Operand& getFirst() noexcept { return _ret[0]; } + //! \overload + ASMJIT_INLINE const Operand& getFirst() const noexcept { return _ret[0]; } + + //! Get the second return operand. + ASMJIT_INLINE Operand& getSecond() noexcept { return _ret[1]; } + //! \overload + ASMJIT_INLINE const Operand& getSecond() const noexcept { return _ret[1]; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Ret operand(s). + Operand _ret[2]; +}; + +// ============================================================================ +// [asmjit::HLCall] +// ============================================================================ + +//! Function call (HL). +class HLCall : public HLNode { + public: + ASMJIT_NO_COPY(HLCall) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLCall` instance. + ASMJIT_INLINE HLCall(Compiler* compiler, const Operand& target) noexcept + : HLNode(compiler, kTypeCall), + _decl(nullptr), + _target(target), + _args(nullptr) { + orFlags(kFlagIsRemovable); + } + + //! Destroy the `HLCall` instance. + ASMJIT_INLINE ~HLCall() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get function declaration. + ASMJIT_INLINE FuncDecl* getDecl() const noexcept { return _decl; } + + //! Get target operand. + ASMJIT_INLINE Operand& getTarget() noexcept { return _target; } + //! \overload + ASMJIT_INLINE const Operand& getTarget() const noexcept { return _target; } + + //! Get return at `i`. + ASMJIT_INLINE Operand& getRet(uint32_t i = 0) noexcept { + ASMJIT_ASSERT(i < 2); + return _ret[i]; + } + //! \overload + ASMJIT_INLINE const Operand& getRet(uint32_t i = 0) const noexcept { + ASMJIT_ASSERT(i < 2); + return _ret[i]; + } + + //! Get argument at `i`. + ASMJIT_INLINE Operand& getArg(uint32_t i) noexcept { + ASMJIT_ASSERT(i < kFuncArgCountLoHi); + return _args[i]; + } + //! \overload + ASMJIT_INLINE const Operand& getArg(uint32_t i) const noexcept { + ASMJIT_ASSERT(i < kFuncArgCountLoHi); + return _args[i]; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Function declaration. + FuncDecl* _decl; + + //! Target (address of function, register, label, ...). + Operand _target; + //! Return. + Operand _ret[2]; + //! Arguments. + Operand* _args; +}; + +// ============================================================================ +// [asmjit::HLCallArg] +// ============================================================================ + +//! Function call's argument (HL). +class HLCallArg : public HLNode { + public: + ASMJIT_NO_COPY(HLCallArg) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `HLCallArg` instance. + ASMJIT_INLINE HLCallArg(Compiler* compiler, HLCall* call, VarData* sVd, VarData* cVd) noexcept + : HLNode(compiler, kTypeCallArg), + _call(call), + _sVd(sVd), + _cVd(cVd), + _args(0) { + orFlags(kFlagIsRemovable); + } + + //! Destroy the `HLCallArg` instance. + ASMJIT_INLINE ~HLCallArg() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the associated function-call. + ASMJIT_INLINE HLCall* getCall() const noexcept { return _call; } + //! Get source variable. + ASMJIT_INLINE VarData* getSVd() const noexcept { return _sVd; } + //! Get conversion variable. + ASMJIT_INLINE VarData* getCVd() const noexcept { return _cVd; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Associated `HLCall`. + HLCall* _call; + //! Source variable. + VarData* _sVd; + //! Temporary variable used for conversion (or nullptr). + VarData* _cVd; + + //! Affected arguments bit-array. + uint32_t _args; +}; + +// ============================================================================ +// [asmjit::HLStream] +// ============================================================================ + +// TODO: + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER +#endif // _ASMJIT_BASE_HLSTREAM_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/logger.cpp b/DynamicHooks/thirdparty/AsmJit/base/logger.cpp new file mode 100644 index 0000000..3d542df --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/logger.cpp @@ -0,0 +1,194 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if !defined(ASMJIT_DISABLE_LOGGER) + +// [Dependencies] +#include "../base/containers.h" +#include "../base/logger.h" +#include "../base/utils.h" +#include + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::LogUtil] +// ============================================================================ + +bool LogUtil::formatLine(StringBuilder& sb, const uint8_t* binData, size_t binLen, size_t dispLen, size_t imLen, const char* comment) noexcept { + size_t currentLen = sb.getLength(); + size_t commentLen = comment ? Utils::strLen(comment, kMaxCommentLength) : 0; + + ASMJIT_ASSERT(binLen >= dispLen); + + if ((binLen != 0 && binLen != kInvalidIndex) || commentLen) { + size_t align = kMaxInstLength; + char sep = ';'; + + for (size_t i = (binLen == kInvalidIndex); i < 2; i++) { + size_t begin = sb.getLength(); + + // Append align. + if (currentLen < align) { + if (!sb.appendChars(' ', align - currentLen)) + return false; + } + + // Append separator. + if (sep) { + if (!(sb.appendChar(sep) & sb.appendChar(' '))) + return false; + } + + // Append binary data or comment. + if (i == 0) { + if (!sb.appendHex(binData, binLen - dispLen - imLen)) + return false; + if (!sb.appendChars('.', dispLen * 2)) + return false; + if (!sb.appendHex(binData + binLen - imLen, imLen)) + return false; + if (commentLen == 0) + break; + } + else { + if (!sb.appendString(comment, commentLen)) + return false; + } + + currentLen += sb.getLength() - begin; + align += kMaxBinaryLength; + sep = '|'; + } + } + + return sb.appendChar('\n'); +} + +// ============================================================================ +// [asmjit::Logger - Construction / Destruction] +// ============================================================================ + +Logger::Logger() noexcept { + _options = 0; + ::memset(_indentation, 0, ASMJIT_ARRAY_SIZE(_indentation)); +} + +Logger::~Logger() noexcept {} + +// ============================================================================ +// [asmjit::Logger - Logging] +// ============================================================================ + +void Logger::logFormat(uint32_t style, const char* fmt, ...) noexcept { + char buf[1024]; + size_t len; + + va_list ap; + va_start(ap, fmt); + len = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (len >= sizeof(buf)) + len = sizeof(buf) - 1; + + logString(style, buf, len); +} + +void Logger::logBinary(uint32_t style, const void* data, size_t size) noexcept { + static const char prefix[] = ".data "; + static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + + const uint8_t* s = static_cast(data); + size_t i = size; + + char buffer[128]; + ::memcpy(buffer, prefix, ASMJIT_ARRAY_SIZE(prefix) - 1); + + while (i) { + uint32_t n = static_cast(Utils::iMin(i, 16)); + char* p = buffer + ASMJIT_ARRAY_SIZE(prefix) - 1; + + i -= n; + do { + uint32_t c = s[0]; + + p[0] = hex[c >> 4]; + p[1] = hex[c & 15]; + + p += 2; + s += 1; + } while (--n); + + *p++ = '\n'; + logString(style, buffer, (size_t)(p - buffer)); + } +} + +// ============================================================================ +// [asmjit::Logger - Indentation] +// ============================================================================ + +void Logger::setIndentation(const char* indentation) noexcept { + ::memset(_indentation, 0, ASMJIT_ARRAY_SIZE(_indentation)); + if (!indentation) + return; + + size_t length = Utils::strLen(indentation, ASMJIT_ARRAY_SIZE(_indentation) - 1); + ::memcpy(_indentation, indentation, length); +} + +// ============================================================================ +// [asmjit::FileLogger - Construction / Destruction] +// ============================================================================ + +FileLogger::FileLogger(FILE* stream) noexcept : _stream(nullptr) { setStream(stream); } +FileLogger::~FileLogger() noexcept {} + +// ============================================================================ +// [asmjit::FileLogger - Logging] +// ============================================================================ + +void FileLogger::logString(uint32_t style, const char* buf, size_t len) noexcept { + if (!_stream) + return; + + if (len == kInvalidIndex) + len = strlen(buf); + + fwrite(buf, 1, len, _stream); +} + +// ============================================================================ +// [asmjit::StringLogger - Construction / Destruction] +// ============================================================================ + +StringLogger::StringLogger() noexcept {} +StringLogger::~StringLogger() noexcept {} + +// ============================================================================ +// [asmjit::StringLogger - Logging] +// ============================================================================ + +void StringLogger::logString(uint32_t style, const char* buf, size_t len) noexcept { + _stringBuilder.appendString(buf, len); +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_LOGGER diff --git a/DynamicHooks/thirdparty/AsmJit/base/logger.h b/DynamicHooks/thirdparty/AsmJit/base/logger.h new file mode 100644 index 0000000..635ecae --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/logger.h @@ -0,0 +1,268 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_LOGGER_H +#define _ASMJIT_BASE_LOGGER_H + +#include "../build.h" + +// [Dependencies] +#include "../base/containers.h" +#include + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +#if !defined(ASMJIT_DISABLE_LOGGER) + +// ============================================================================ +// [asmjit::LogUtil] +// ============================================================================ + +// Only used by asmjit internals, not available to consumers. +#if defined(ASMJIT_EXPORTS) +struct LogUtil { + enum { + // Has to be big to be able to hold all metadata compiler can assign to a + // single instruction. + kMaxCommentLength = 512, + kMaxInstLength = 40, + kMaxBinaryLength = 26 + }; + + static bool formatLine( + StringBuilder& sb, + const uint8_t* binData, size_t binLen, size_t dispLen, size_t imLen, const char* comment) noexcept; +}; +#endif // ASMJIT_EXPORTS + +// ============================================================================ +// [asmjit::Logger] +// ============================================================================ + +//! Abstract logging class. +//! +//! This class can be inherited and reimplemented to fit into your logging +//! subsystem. When reimplementing use `Logger::log()` method to log into +//! a custom stream. +//! +//! This class also contain `_enabled` member that can be used to enable +//! or disable logging. +class ASMJIT_VIRTAPI Logger { + public: + ASMJIT_NO_COPY(Logger) + + // -------------------------------------------------------------------------- + // [Options] + // -------------------------------------------------------------------------- + + //! Logger options. + ASMJIT_ENUM(Options) { + kOptionBinaryForm = 0x00000001, //! Output instructions also in binary form. + kOptionHexImmediate = 0x00000002, //! Output immediates as hexadecimal numbers. + kOptionHexDisplacement = 0x00000004 //! Output displacements as hexadecimal numbers. + }; + + // -------------------------------------------------------------------------- + // [Style] + // -------------------------------------------------------------------------- + + //! Logger style. + ASMJIT_ENUM(Style) { + kStyleDefault = 0, + kStyleDirective = 1, + kStyleLabel = 2, + kStyleData = 3, + kStyleComment = 4, + + kStyleCount = 5 + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a `Logger` instance. + ASMJIT_API Logger() noexcept; + //! Destroy the `Logger` instance. + ASMJIT_API virtual ~Logger() noexcept; + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + //! Log output. + virtual void logString(uint32_t style, const char* buf, size_t len = kInvalidIndex) noexcept = 0; + + //! Log formatter message (like sprintf) sending output to `logString()` method. + ASMJIT_API void logFormat(uint32_t style, const char* fmt, ...) noexcept; + //! Log binary data. + ASMJIT_API void logBinary(uint32_t style, const void* data, size_t size) noexcept; + + // -------------------------------------------------------------------------- + // [Options] + // -------------------------------------------------------------------------- + + //! Get all logger options as a single integer. + ASMJIT_INLINE uint32_t getOptions() const noexcept { return _options; } + + //! Get the given logger option. + ASMJIT_INLINE bool hasOption(uint32_t option) const noexcept { + return (_options & option) != 0; + } + ASMJIT_INLINE void addOptions(uint32_t options) noexcept { _options |= options; } + ASMJIT_INLINE void clearOptions(uint32_t options) noexcept { _options &= ~options; } + + // -------------------------------------------------------------------------- + // [Indentation] + // -------------------------------------------------------------------------- + + //! Get indentation. + ASMJIT_INLINE const char* getIndentation() const noexcept { + return _indentation; + } + + //! Set indentation. + ASMJIT_API void setIndentation(const char* indentation) noexcept; + + //! Reset indentation. + ASMJIT_INLINE void resetIndentation() noexcept { + setIndentation(nullptr); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Options, see \ref LoggerOption. + uint32_t _options; + + //! Indentation. + char _indentation[12]; +}; + +// ============================================================================ +// [asmjit::FileLogger] +// ============================================================================ + +//! Logger that can log to standard C `FILE*` stream. +class ASMJIT_VIRTAPI FileLogger : public Logger { + public: + ASMJIT_NO_COPY(FileLogger) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `FileLogger` that logs to a `FILE` stream. + ASMJIT_API FileLogger(FILE* stream = nullptr) noexcept; + + //! Destroy the `FileLogger`. + ASMJIT_API virtual ~FileLogger() noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get `FILE*` stream. + //! + //! NOTE: Return value can be `nullptr`. + ASMJIT_INLINE FILE* getStream() const noexcept { + return _stream; + } + + //! Set `FILE*` stream, can be set to `nullptr` to disable logging, although + //! the `ExternalTool` will still call `logString` even if there is no stream. + ASMJIT_INLINE void setStream(FILE* stream) noexcept { + _stream = stream; + } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual void logString(uint32_t style, const char* buf, size_t len = kInvalidIndex) noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! C file stream. + FILE* _stream; +}; + +// ============================================================================ +// [asmjit::StringLogger] +// ============================================================================ + +//! String logger. +class ASMJIT_VIRTAPI StringLogger : public Logger { + public: + ASMJIT_NO_COPY(StringLogger) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create new `StringLogger`. + ASMJIT_API StringLogger() noexcept; + + //! Destroy the `StringLogger`. + ASMJIT_API virtual ~StringLogger() noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get `char*` pointer which represents the resulting string. + //! + //! The pointer is owned by `StringLogger`, it can't be modified or freed. + ASMJIT_INLINE const char* getString() const noexcept { + return _stringBuilder.getData(); + } + + //! Get the length of the string returned by `getString()`. + ASMJIT_INLINE size_t getLength() const noexcept { + return _stringBuilder.getLength(); + } + + //! Clear the resulting string. + ASMJIT_INLINE void clearString() noexcept { + _stringBuilder.clear(); + } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual void logString(uint32_t style, const char* buf, size_t len = kInvalidIndex) noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Output. + StringBuilder _stringBuilder; +}; +#else +struct Logger; +#endif // !ASMJIT_DISABLE_LOGGER + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_LOGGER_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/operand.cpp b/DynamicHooks/thirdparty/AsmJit/base/operand.cpp new file mode 100644 index 0000000..76e0b0a --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/operand.cpp @@ -0,0 +1,52 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/globals.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::Operand] +// ============================================================================ + +// Prevent static initialization. +class Operand { + public: + struct BaseOp { + uint8_t op; + uint8_t size; + uint8_t reserved_2_1; + uint8_t reserved_3_1; + + uint32_t id; + + uint32_t reserved_8_4; + uint32_t reserved_12_4; + }; + + // Kept in union to prevent LTO warnings. + union { + BaseOp _base; + + // Required to properly align this _fake_ `Operand`, not used. + uint64_t _data[2]; + }; +}; + +ASMJIT_VARAPI const Operand noOperand; +const Operand noOperand = {{ 0, 0, 0, 0, kInvalidValue, 0, 0 }}; + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/operand.h b/DynamicHooks/thirdparty/AsmJit/base/operand.h new file mode 100644 index 0000000..c130407 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/operand.h @@ -0,0 +1,1192 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_OPERAND_H +#define _ASMJIT_BASE_OPERAND_H + +// [Dependencies] +#include "../base/utils.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class Assembler; +class Compiler; + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::RegClass] +// ============================================================================ + +//! Register class. +ASMJIT_ENUM(RegClass) { + //! Gp register class, compatible with all architectures. + kRegClassGp = 0 +}; + +// ============================================================================ +// [asmjit::SizeDefs] +// ============================================================================ + +//! Common sizes of registers and data elements. +ASMJIT_ENUM(SizeDefs) { + //! 1 byte size (BYTE). + kSizeByte = 1, + //! 2 bytes size (WORD). + kSizeWord = 2, + //! 4 bytes size (DWORD). + kSizeDWord = 4, + //! 8 bytes size (QWORD). + kSizeQWord = 8, + //! 10 bytes size (TWORD). + kSizeTWord = 10, + //! 16 bytes size (DQWORD). + kSizeDQWord = 16, + //! 32 bytes size (YWORD / QQWORD). + kSizeYWord = 32, + //! 64 bytes size (ZWORD / DQQWORD). + kSizeZWord = 64 +}; + +// ============================================================================ +// [asmjit::MemType] +// ============================================================================ + +//! Type of memory operand. +ASMJIT_ENUM(MemType) { + //! Memory operand is a combination of a base register, an optional index + //! register, and displacement. + //! + //! The `Assembler` interprets `kMemTypeBaseIndex` and `kMemTypeStackIndex` + //! types the same way, but `Compiler` interprets `kMemTypeBaseIndex` as + //! `[base + index]` and `kMemTypeStackIndex` as `[stack(base) + index]`. + kMemTypeBaseIndex = 0, + + //! Memory operand is a combination of variable's memory location, an + //! optional index register, and displacement. + //! + //! The `Assembler` interprets `kMemTypeBaseIndex` and `kMemTypeStackIndex` + //! types the same way, but `Compiler` interprets `kMemTypeBaseIndex` as + //! `[base + index]` and `kMemTypeStackIndex` as `[stack(base) + index]`. + kMemTypeStackIndex = 1, + + //! Memory operand is an absolute memory location. + //! + //! Supported mostly by x86, truncated to a 32-bit value when running in + //! 64-bit mode (x64). + kMemTypeAbsolute = 2, + + //! Memory operand refers to the memory location specified by a label. + kMemTypeLabel = 3, + + //! Memory operand is an address specified by RIP. + kMemTypeRip = 4 +}; + +// ============================================================================ +// [asmjit::VarType] +// ============================================================================ + +ASMJIT_ENUM(VarType) { + //! Variable is 8-bit signed integer. + kVarTypeInt8 = 0, + //! Variable is 8-bit unsigned integer. + kVarTypeUInt8 = 1, + //! Variable is 16-bit signed integer. + kVarTypeInt16 = 2, + //! Variable is 16-bit unsigned integer. + kVarTypeUInt16 = 3, + //! Variable is 32-bit signed integer. + kVarTypeInt32 = 4, + //! Variable is 32-bit unsigned integer. + kVarTypeUInt32 = 5, + //! Variable is 64-bit signed integer. + kVarTypeInt64 = 6, + //! Variable is 64-bit unsigned integer. + kVarTypeUInt64 = 7, + + //! Variable is target `intptr_t`, compatible with the target's `intptr_t` (not hosts). + kVarTypeIntPtr = 8, + //! Variable is target `uintptr_t`, compatible with the target's `uintptr_t` (not hosts). + kVarTypeUIntPtr = 9, + + //! Variable is 32-bit floating point (single precision). + kVarTypeFp32 = 10, + //! Variable is 64-bit floating point (double precision). + kVarTypeFp64 = 11, + + //! \internal + _kVarTypeIntStart = kVarTypeInt8, + //! \internal + _kVarTypeIntEnd = kVarTypeUIntPtr, + + //! \internal + _kVarTypeFpStart = kVarTypeFp32, + //! \internal + _kVarTypeFpEnd = kVarTypeFp64 +}; + +// ============================================================================ +// [asmjit::Operand] +// ============================================================================ + +//! Operand can contain register, memory location, immediate, or label. +class Operand { + public: + // -------------------------------------------------------------------------- + // [Type] + // -------------------------------------------------------------------------- + + //! Operand types that can be encoded in \ref Operand. + ASMJIT_ENUM(Type) { + //! Invalid operand, used only internally (not initialized Operand). + kTypeNone = 0, + //! Operand is a register. + kTypeReg = 1, + //! Operand is a variable. + kTypeVar = 2, + //! Operand is a memory. + kTypeMem = 3, + //! Operand is an immediate value. + kTypeImm = 4, + //! Operand is a label. + kTypeLabel = 5 + }; + + // -------------------------------------------------------------------------- + // [Id] + // -------------------------------------------------------------------------- + + //! Operand ID masks used to determine the operand type. + ASMJIT_ENUM(IdTag) { + //! Operand id refers to a variable (\ref Var). + kIdVarTag = 0x80000000U, + //! Operand id refers to a label (\ref Label). + kIdLabelTag = 0x00000000U, + //! Valid bits stored in operand ID (for extracting array index from ID). + kIdIndexMask = 0x7FFFFFFFU + }; + + // -------------------------------------------------------------------------- + // [Structs] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Base operand data. + struct BaseOp { + //! Type of the operand (see \ref Type). + uint8_t op; + //! Size of the operand (register, address, immediate, or variable). + uint8_t size; + //! \internal + uint8_t reserved_2_1; + //! \internal + uint8_t reserved_3_1; + + //! Operand id, identifier used by `Assembler` and `Compiler`. + //! + //! NOTE: Uninitialized operand has always set id to `kInvalidValue`. + uint32_t id; + + //! \internal + uint32_t reserved_8_4; + //! \internal + uint32_t reserved_12_4; + }; + + //! \internal + //! + //! Register or Variable operand data. + struct VRegOp { + //! Type of the operand (\ref kTypeReg or \ref kTypeVar). + uint8_t op; + //! Size of the operand (register or variable). + uint8_t size; + + union { + //! Register code = (type << 8) | index. + uint16_t code; + + //! Register type and index access. + struct { +#if ASMJIT_ARCH_LE + //! Register index. + uint8_t index; + //! Register type. + uint8_t type; +#else + //! Register type. + uint8_t type; + //! Register index. + uint8_t index; +#endif + }; + }; + + //! Variable id, used by `Compiler` to identify variables. + uint32_t id; + + union { + struct { + //! Variable type. + uint32_t vType; + //! \internal + uint32_t reserved_12_4; + }; + + //! \internal + //! + //! This is not needed or used, it's just to force compiler to always + //! align this struct to 8-bytes (it should fix LTO warning as well). + uint64_t reserved8_8; + }; + }; + + //! \internal + //! + //! Memory or Variable operand data. + struct VMemOp { + //! Type of the operand (\ref kTypeMem). + uint8_t op; + //! Size of the memory in bytes or zero. + uint8_t size; + //! Type of the memory operand, see `MemType`. + uint8_t type; + //! X86/X64 layout: + //! - segment [3 bits], see `X86Seg`. + //! - shift [2 bits], index register shift (0 to 3). + uint8_t flags; + + //! Base register, variable or label id. + uint32_t base; + //! Index register or variable. + uint32_t index; + //! 32-bit displacement or absolute address. + int32_t displacement; + }; + + //! \internal + //! + //! Immediate operand data. + struct ImmOp { + //! Type of the operand (\ref kTypeImm). + uint8_t op; + //! Size of the immediate (or 0 to autodetect). + uint8_t size; + //! \internal + uint8_t reserved_2_1; + //! \internal + uint8_t reserved_3_1; + + //! Operand id, always set to `kInvalidValue` (immediates don't have IDs). + uint32_t id; + + union { + //! 8x8-bit signed immediate values. + int8_t _i8[8]; + //! 8x8-bit unsigned immediate values. + uint8_t _u8[8]; + + //! 4x16-bit signed immediate values. + int16_t _i16[4]; + //! 4x16-bit unsigned immediate values. + uint16_t _u16[4]; + + //! 2x32-bit signed immediate values. + int32_t _i32[2]; + //! 2x32-bit unsigned immediate values. + uint32_t _u32[2]; + + //! 1x64-bit signed immediate value. + int64_t _i64[1]; + //! 1x64-bit unsigned immediate value. + uint64_t _u64[1]; + + //! 2x SP-FP values. + float _f32[2]; + //! 1x DP-FP value. + double _f64[1]; + } value; + }; + + //! \internal + //! + //! Label operand data. + struct LabelOp { + //! Type of the operand (\ref kTypeLabel). + uint8_t op; + //! Always zero. + uint8_t size; + //! \internal + uint8_t reserved_2_1; + //! \internal + uint8_t reserved_3_1; + + //! Operand id (`kInvalidValue` if the label is not initialized by code + //! generator). + uint32_t id; + + //! \internal + uint32_t reserved_8_4; + //! \internal + uint32_t reserved_12_4; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create an uninitialized operand. + ASMJIT_INLINE Operand() noexcept { + reset(); + } + + //! Create a reference to `other` operand. + ASMJIT_INLINE Operand(const Operand& other) noexcept { + _init(other); + } + + explicit ASMJIT_INLINE Operand(const _NoInit&) noexcept {} + + // -------------------------------------------------------------------------- + // [Base] + // -------------------------------------------------------------------------- + + //! Clone the `Operand`. + ASMJIT_INLINE Operand clone() const noexcept { return Operand(*this); } + + //! Reset the `Operand`. + ASMJIT_INLINE void reset() noexcept { + _init_packed_op_sz_b0_b1_id(kTypeNone, 0, 0, 0, kInvalidValue); + _init_packed_d2_d3(0, 0); + } + + // -------------------------------------------------------------------------- + // [Init & Copy] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Initialize operand to `other` (used by constructors). + ASMJIT_INLINE void _init(const Operand& other) noexcept { + ::memcpy(this, &other, sizeof(Operand)); + } + + ASMJIT_INLINE void _init_packed_op_sz_b0_b1_id(uint32_t op, uint32_t sz, uint32_t r0, uint32_t r1, uint32_t id) noexcept { + // This hack is not for performance, but to decrease the size of the binary + // generated when constructing AsmJit operands (mostly for third parties). + // Some compilers are not able to join four BYTE writes to a single DWORD + // write. Because the 'a', 'b', 'c' and 'd' variables are usually compile + // time constants the compiler can do a really nice job if they are joined + // by using bitwise operations. + _packed[0].setPacked_2x32(Utils::pack32_4x8(op, sz, r0, r1), id); + } + + ASMJIT_INLINE void _init_packed_op_sz_w0_id(uint32_t op, uint32_t sz, uint32_t w0, uint32_t id) noexcept { + _packed[0].setPacked_2x32(Utils::pack32_2x8_1x16(op, sz, w0), id); + } + + ASMJIT_INLINE void _init_packed_d0_d1(uint32_t u0, uint32_t u1) noexcept { + _packed[0].setPacked_2x32(u0, u1); + } + + ASMJIT_INLINE void _init_packed_d2_d3(uint32_t u2, uint32_t u3) noexcept { + _packed[1].setPacked_2x32(u2, u3); + } + + //! \internal + //! + //! Initialize operand to `other` (used by assign operators). + ASMJIT_INLINE void _copy(const Operand& other) noexcept { + ::memcpy(this, &other, sizeof(Operand)); + } + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE T& getData() noexcept { + return reinterpret_cast(_base); + } + + template + ASMJIT_INLINE const T& getData() const noexcept { + return reinterpret_cast(_base); + } + + // -------------------------------------------------------------------------- + // [Type] + // -------------------------------------------------------------------------- + + //! Get type of the operand, see \ref Type. + ASMJIT_INLINE uint32_t getOp() const noexcept { return _base.op; } + + //! Get whether the operand is none (\ref kTypeNone). + ASMJIT_INLINE bool isNone() const noexcept { return (_base.op == kTypeNone); } + //! Get whether the operand is a register (\ref kTypeReg). + ASMJIT_INLINE bool isReg() const noexcept { return (_base.op == kTypeReg); } + //! Get whether the operand is a variable (\ref kTypeVar). + ASMJIT_INLINE bool isVar() const noexcept { return (_base.op == kTypeVar); } + //! Get whether the operand is a memory location (\ref kTypeMem). + ASMJIT_INLINE bool isMem() const noexcept { return (_base.op == kTypeMem); } + //! Get whether the operand is an immediate (\ref kTypeImm). + ASMJIT_INLINE bool isImm() const noexcept { return (_base.op == kTypeImm); } + //! Get whether the operand is a label (\ref kTypeLabel). + ASMJIT_INLINE bool isLabel() const noexcept { return (_base.op == kTypeLabel); } + + // -------------------------------------------------------------------------- + // [Type - Combined] + // -------------------------------------------------------------------------- + + //! Get register type. + ASMJIT_INLINE uint32_t getRegType() const noexcept { return _vreg.type; } + //! Get register index. + ASMJIT_INLINE uint32_t getRegIndex() const noexcept { return _vreg.index; } + + //! Get whether the operand is register of `type`. + ASMJIT_INLINE bool isRegType(uint32_t type) const noexcept { + return (_packed[0].u32[0] & Utils::pack32_2x8_1x16(0xFF, 0, 0xFF00)) == Utils::pack32_2x8_1x16(kTypeReg, 0, (type << 8)); + } + + //! Get whether the operand is register and of `type` and `index`. + ASMJIT_INLINE bool isRegCode(uint32_t type, uint32_t index) const noexcept { + return (_packed[0].u32[0] & Utils::pack32_2x8_1x16(0xFF, 0, 0xFFFF)) == Utils::pack32_2x8_1x16(kTypeReg, 0, (type << 8) + index); + } + + //! Get whether the operand is a register or memory. + ASMJIT_INLINE bool isRegOrMem() const noexcept { + ASMJIT_ASSERT(kTypeReg == 1); + ASMJIT_ASSERT(kTypeMem == 3); + return (static_cast(_base.op) | 0x2U) == 0x3U; + } + + //! Get whether the operand is variable or memory. + ASMJIT_INLINE bool isVarOrMem() const noexcept { + ASMJIT_ASSERT(kTypeVar == 2); + ASMJIT_ASSERT(kTypeMem == 3); + return (static_cast(_base.op) - 2U) <= 1; + } + + // -------------------------------------------------------------------------- + // [Size] + // -------------------------------------------------------------------------- + + //! Get size of the operand in bytes. + ASMJIT_INLINE uint32_t getSize() const noexcept { return _base.size; } + + // -------------------------------------------------------------------------- + // [Id] + // -------------------------------------------------------------------------- + + //! Get operand id. + //! + //! Operand id's are used internally by `Assembler` and `Compiler`. + //! + //! There is no way to change or remove operand id. Unneeded operands can be + //! simply reassigned by `operator=`. + ASMJIT_INLINE uint32_t getId() const noexcept { return _base.id; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + //! Base data. + BaseOp _base; + //! Register or variable data. + VRegOp _vreg; + //! Memory data. + VMemOp _vmem; + //! Immediate data. + ImmOp _imm; + //! Label data. + LabelOp _label; + + //! Packed operand as two 64-bit integers. + UInt64 _packed[2]; + }; +}; + +// ============================================================================ +// [asmjit::OperandUtil] +// ============================================================================ + +//! Operand utilities. +struct OperandUtil { + //! Make variable id. + static ASMJIT_INLINE uint32_t makeVarId(uint32_t id) noexcept { + return id | Operand::kIdVarTag; + } + + //! Make label id. + static ASMJIT_INLINE uint32_t makeLabelId(uint32_t id) noexcept { + return id | Operand::kIdLabelTag; + } + + //! Strip variable id bit so it becomes a pure index to `VarData[]` array. + static ASMJIT_INLINE uint32_t stripVarId(uint32_t id) noexcept { + return id & Operand::kIdIndexMask; + } + + //! Get whether the id refers to `Var`. + //! + //! NOTE: The function will never return `true` if the id is `kInvalidValue`. + //! The trick is to compare a given id to -1 (kInvalidValue) so we check both + //! using only one comparison. + static ASMJIT_INLINE bool isVarId(uint32_t id) noexcept { + return static_cast(id) < -1; + } + + //! Get whether the id refers to `Label`. + //! + //! NOTE: The function will never return `true` if the id is `kInvalidValue`. + static ASMJIT_INLINE bool isLabelId(uint32_t id) noexcept { + return static_cast(id) >= 0; + } +}; + +// ============================================================================ +// [asmjit::Reg] +// ============================================================================ + +//! Base class for all register operands. +class Reg : public Operand { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy base register. + ASMJIT_INLINE Reg() noexcept : Operand(NoInit) { + _init_packed_op_sz_w0_id(kTypeReg, 0, (kInvalidReg << 8) + kInvalidReg, kInvalidValue); + _init_packed_d2_d3(kInvalidVar, 0); + } + + //! Create a new base register. + ASMJIT_INLINE Reg(uint32_t type, uint32_t index, uint32_t size) noexcept : Operand(NoInit) { + _init_packed_op_sz_w0_id(kTypeReg, size, (type << 8) + index, kInvalidValue); + _init_packed_d2_d3(kInvalidVar, 0); + } + + //! Create a new reference to `other`. + ASMJIT_INLINE Reg(const Reg& other) noexcept : Operand(other) {} + + //! Create a new reference to `other` and change the index to `index`. + ASMJIT_INLINE Reg(const Reg& other, uint32_t index) noexcept : Operand(other) { + _vreg.index = static_cast(index); + } + + explicit ASMJIT_INLINE Reg(const _NoInit&) noexcept : Operand(NoInit) {} + + // -------------------------------------------------------------------------- + // [Reg Specific] + // -------------------------------------------------------------------------- + + //! Clone `Reg` operand. + ASMJIT_INLINE Reg clone() const noexcept { + return Reg(*this); + } + + //! Get whether register code is equal to `type`. + ASMJIT_INLINE bool isRegType(uint32_t type) const noexcept { + return _vreg.type == type; + } + + //! Get whether register code is equal to `type`. + ASMJIT_INLINE bool isRegCode(uint32_t code) const noexcept { + return _vreg.code == code; + } + + //! Get whether register code is equal to `type`. + ASMJIT_INLINE bool isRegCode(uint32_t type, uint32_t index) const noexcept { + return _vreg.code == (type << 8) + index; + } + + //! Get register code that equals to '(type << 8) + index'. + ASMJIT_INLINE uint32_t getRegCode() const noexcept { + return _vreg.code; + } + + //! Get register type. + ASMJIT_INLINE uint32_t getRegType() const noexcept { + return _vreg.type; + } + + //! Get register index. + ASMJIT_INLINE uint32_t getRegIndex() const noexcept { + return _vreg.index; + } + +#define ASMJIT_REG_OP(_Type_) \ + ASMJIT_INLINE _Type_ clone() const ASMJIT_NOEXCEPT { \ + return _Type_(*this); \ + } \ + \ + /*! Set register `size`. */ \ + ASMJIT_INLINE _Type_& setSize(uint32_t size) ASMJIT_NOEXCEPT { \ + _vreg.size = static_cast(size); \ + return *this; \ + } \ + \ + /*! Set register `code`. */ \ + ASMJIT_INLINE _Type_& setCode(uint32_t code) ASMJIT_NOEXCEPT { \ + _vreg.code = static_cast(code); \ + return *this; \ + } \ + \ + /*! Set register `type` and `index`. */ \ + ASMJIT_INLINE _Type_& setCode(uint32_t type, uint32_t index) ASMJIT_NOEXCEPT { \ + _vreg.type = static_cast(type); \ + _vreg.index = static_cast(index); \ + return *this; \ + } \ + \ + /*! Set register `type`. */ \ + ASMJIT_INLINE _Type_& setType(uint32_t type) ASMJIT_NOEXCEPT { \ + _vreg.type = static_cast(type); \ + return *this; \ + } \ + \ + /*! Set register `index`. */ \ + ASMJIT_INLINE _Type_& setIndex(uint32_t index) ASMJIT_NOEXCEPT { \ + _vreg.index = static_cast(index); \ + return *this; \ + } \ + \ + ASMJIT_INLINE _Type_& operator=(const _Type_& other) ASMJIT_NOEXCEPT { \ + _copy(other); return *this; \ + } \ + \ + ASMJIT_INLINE bool operator==(const _Type_& other) const ASMJIT_NOEXCEPT { \ + return _packed[0].u32[0] == other._packed[0].u32[0]; \ + } \ + \ + ASMJIT_INLINE bool operator!=(const _Type_& other) const ASMJIT_NOEXCEPT { \ + return !operator==(other); \ + } +}; + +// ============================================================================ +// [asmjit::BaseMem] +// ============================================================================ + +//! Base class for all memory operands. +class BaseMem : public Operand { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BaseMem() noexcept : Operand(NoInit) { + reset(); + } + + ASMJIT_INLINE BaseMem(const BaseMem& other) noexcept : Operand(other) {} + explicit ASMJIT_INLINE BaseMem(const _NoInit&) noexcept : Operand(NoInit) {} + + // -------------------------------------------------------------------------- + // [BaseMem Specific] + // -------------------------------------------------------------------------- + + //! Clone `BaseMem` operand. + ASMJIT_INLINE BaseMem clone() const noexcept { + return BaseMem(*this); + } + + //! Reset `BaseMem` operand. + ASMJIT_INLINE void reset() noexcept { + _init_packed_op_sz_b0_b1_id(kTypeMem, 0, kMemTypeBaseIndex, 0, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, 0); + } + + //! Get the type of the memory operand, see `MemType`. + ASMJIT_INLINE uint32_t getMemType() const noexcept { + return _vmem.type; + } + + //! Get whether the type of the memory operand is either `kMemTypeBaseIndex` + //! or `kMemTypeStackIndex`. + ASMJIT_INLINE bool isBaseIndexType() const noexcept { + return _vmem.type <= kMemTypeStackIndex; + } + + //! Get whether the memory operand has base register. + ASMJIT_INLINE bool hasBase() const noexcept { + return _vmem.base != kInvalidValue; + } + + //! Get memory operand base id, or `kInvalidValue`. + ASMJIT_INLINE uint32_t getBase() const noexcept { + return _vmem.base; + } + + //! Set memory operand size. + ASMJIT_INLINE BaseMem& setSize(uint32_t size) noexcept { + _vmem.size = static_cast(size); + return *this; + } + + //! Get memory operand relative displacement. + ASMJIT_INLINE int32_t getDisplacement() const noexcept { + return _vmem.displacement; + } + + //! Set memory operand relative displacement. + ASMJIT_INLINE BaseMem& setDisplacement(int32_t disp) noexcept { + _vmem.displacement = disp; + return *this; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BaseMem& operator=(const BaseMem& other) noexcept { + _copy(other); + return *this; + } + + ASMJIT_INLINE bool operator==(const BaseMem& other) const noexcept { + return (_packed[0] == other._packed[0]) & (_packed[1] == other._packed[1]); + } + + ASMJIT_INLINE bool operator!=(const BaseMem& other) const noexcept { + return !(*this == other); + } +}; + +// ============================================================================ +// [asmjit::Imm] +// ============================================================================ + +//! Immediate operand. +//! +//! Immediate operand is usually part of instruction itself. It's inlined after +//! or before the instruction opcode. Immediates can be only signed or unsigned +//! integers. +//! +//! To create immediate operand use `imm()` or `imm_u()` non-members or `Imm` +//! constructors. +class Imm : public Operand { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new immediate value (initial value is 0). + Imm() noexcept : Operand(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeImm, 0, 0, 0, kInvalidValue); + _imm.value._i64[0] = 0; + } + + //! Create a new signed immediate value, assigning the value to `val`. + explicit Imm(int64_t val) noexcept : Operand(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeImm, 0, 0, 0, kInvalidValue); + _imm.value._i64[0] = val; + } + + //! Create a new immediate value from `other`. + ASMJIT_INLINE Imm(const Imm& other) noexcept : Operand(other) {} + + explicit ASMJIT_INLINE Imm(const _NoInit&) noexcept : Operand(NoInit) {} + + // -------------------------------------------------------------------------- + // [Immediate Specific] + // -------------------------------------------------------------------------- + + //! Clone `Imm` operand. + ASMJIT_INLINE Imm clone() const noexcept { + return Imm(*this); + } + + //! Get whether the immediate can be casted to 8-bit signed integer. + ASMJIT_INLINE bool isInt8() const noexcept { return Utils::isInt8(_imm.value._i64[0]); } + //! Get whether the immediate can be casted to 8-bit unsigned integer. + ASMJIT_INLINE bool isUInt8() const noexcept { return Utils::isUInt8(_imm.value._i64[0]); } + + //! Get whether the immediate can be casted to 16-bit signed integer. + ASMJIT_INLINE bool isInt16() const noexcept { return Utils::isInt16(_imm.value._i64[0]); } + //! Get whether the immediate can be casted to 16-bit unsigned integer. + ASMJIT_INLINE bool isUInt16() const noexcept { return Utils::isUInt16(_imm.value._i64[0]); } + + //! Get whether the immediate can be casted to 32-bit signed integer. + ASMJIT_INLINE bool isInt32() const noexcept { return Utils::isInt32(_imm.value._i64[0]); } + //! Get whether the immediate can be casted to 32-bit unsigned integer. + ASMJIT_INLINE bool isUInt32() const noexcept { return Utils::isUInt32(_imm.value._i64[0]); } + + //! Get immediate value as 8-bit signed integer. + ASMJIT_INLINE int8_t getInt8() const noexcept { return _imm.value._i8[_ASMJIT_ARCH_INDEX(8, 0)]; } + //! Get immediate value as 8-bit unsigned integer. + ASMJIT_INLINE uint8_t getUInt8() const noexcept { return _imm.value._u8[_ASMJIT_ARCH_INDEX(8, 0)]; } + //! Get immediate value as 16-bit signed integer. + ASMJIT_INLINE int16_t getInt16() const noexcept { return _imm.value._i16[_ASMJIT_ARCH_INDEX(4, 0)]; } + //! Get immediate value as 16-bit unsigned integer. + ASMJIT_INLINE uint16_t getUInt16() const noexcept { return _imm.value._u16[_ASMJIT_ARCH_INDEX(4, 0)]; } + //! Get immediate value as 32-bit signed integer. + ASMJIT_INLINE int32_t getInt32() const noexcept { return _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 0)]; } + //! Get immediate value as 32-bit unsigned integer. + ASMJIT_INLINE uint32_t getUInt32() const noexcept { return _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 0)]; } + //! Get immediate value as 64-bit signed integer. + ASMJIT_INLINE int64_t getInt64() const noexcept { return _imm.value._i64[0]; } + //! Get immediate value as 64-bit unsigned integer. + ASMJIT_INLINE uint64_t getUInt64() const noexcept { return _imm.value._u64[0]; } + + //! Get immediate value as `intptr_t`. + ASMJIT_INLINE intptr_t getIntPtr() const noexcept { + if (sizeof(intptr_t) == sizeof(int64_t)) + return static_cast(getInt64()); + else + return static_cast(getInt32()); + } + + //! Get immediate value as `uintptr_t`. + ASMJIT_INLINE uintptr_t getUIntPtr() const noexcept { + if (sizeof(uintptr_t) == sizeof(uint64_t)) + return static_cast(getUInt64()); + else + return static_cast(getUInt32()); + } + + //! Get low 32-bit signed integer. + ASMJIT_INLINE int32_t getInt32Lo() const noexcept { return _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 0)]; } + //! Get low 32-bit signed integer. + ASMJIT_INLINE uint32_t getUInt32Lo() const noexcept { return _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 0)]; } + //! Get high 32-bit signed integer. + ASMJIT_INLINE int32_t getInt32Hi() const noexcept { return _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 1)]; } + //! Get high 32-bit signed integer. + ASMJIT_INLINE uint32_t getUInt32Hi() const noexcept { return _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)]; } + + //! Set immediate value to 8-bit signed integer `val`. + ASMJIT_INLINE Imm& setInt8(int8_t val) noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._i64[0] = static_cast(val); + } + else { + int32_t val32 = static_cast(val); + _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 0)] = val32; + _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 1)] = val32 >> 31; + } + return *this; + } + + //! Set immediate value to 8-bit unsigned integer `val`. + ASMJIT_INLINE Imm& setUInt8(uint8_t val) noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._u64[0] = static_cast(val); + } + else { + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 0)] = static_cast(val); + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)] = 0; + } + return *this; + } + + //! Set immediate value to 16-bit signed integer `val`. + ASMJIT_INLINE Imm& setInt16(int16_t val) noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._i64[0] = static_cast(val); + } + else { + int32_t val32 = static_cast(val); + _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 0)] = val32; + _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 1)] = val32 >> 31; + } + return *this; + } + + //! Set immediate value to 16-bit unsigned integer `val`. + ASMJIT_INLINE Imm& setUInt16(uint16_t val) noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._u64[0] = static_cast(val); + } + else { + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 0)] = static_cast(val); + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)] = 0; + } + return *this; + } + + //! Set immediate value to 32-bit signed integer `val`. + ASMJIT_INLINE Imm& setInt32(int32_t val) noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._i64[0] = static_cast(val); + } + else { + _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 0)] = val; + _imm.value._i32[_ASMJIT_ARCH_INDEX(2, 1)] = val >> 31; + } + return *this; + } + + //! Set immediate value to 32-bit unsigned integer `val`. + ASMJIT_INLINE Imm& setUInt32(uint32_t val) noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._u64[0] = static_cast(val); + } + else { + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 0)] = val; + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)] = 0; + } + return *this; + } + + //! Set immediate value to 64-bit signed integer `val`. + ASMJIT_INLINE Imm& setInt64(int64_t val) noexcept { + _imm.value._i64[0] = val; + return *this; + } + + //! Set immediate value to 64-bit unsigned integer `val`. + ASMJIT_INLINE Imm& setUInt64(uint64_t val) noexcept { + _imm.value._u64[0] = val; + return *this; + } + + //! Set immediate value to intptr_t `val`. + ASMJIT_INLINE Imm& setIntPtr(intptr_t val) noexcept { + _imm.value._i64[0] = static_cast(val); + return *this; + } + + //! Set immediate value to uintptr_t `val`. + ASMJIT_INLINE Imm& setUIntPtr(uintptr_t val) noexcept { + _imm.value._u64[0] = static_cast(val); + return *this; + } + + //! Set immediate value as unsigned type to `val`. + ASMJIT_INLINE Imm& setPtr(void* p) noexcept { + return setIntPtr((intptr_t)p); + } + + // -------------------------------------------------------------------------- + // [Float] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Imm& setFloat(float f) noexcept { + _imm.value._f32[_ASMJIT_ARCH_INDEX(2, 0)] = f; + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)] = 0; + return *this; + } + + ASMJIT_INLINE Imm& setDouble(double d) noexcept { + _imm.value._f64[0] = d; + return *this; + } + + // -------------------------------------------------------------------------- + // [Truncate] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Imm& truncateTo8Bits() noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._u64[0] &= static_cast(0x000000FFU); + } + else { + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 0)] &= 0x000000FFU; + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)] = 0; + } + return *this; + } + + ASMJIT_INLINE Imm& truncateTo16Bits() noexcept { + if (ASMJIT_ARCH_64BIT) { + _imm.value._u64[0] &= static_cast(0x0000FFFFU); + } + else { + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 0)] &= 0x0000FFFFU; + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)] = 0; + } + return *this; + } + + ASMJIT_INLINE Imm& truncateTo32Bits() noexcept { + _imm.value._u32[_ASMJIT_ARCH_INDEX(2, 1)] = 0; + return *this; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + //! Assign `other` to the immediate operand. + ASMJIT_INLINE Imm& operator=(const Imm& other) noexcept { + _copy(other); + return *this; + } +}; + +// ============================================================================ +// [asmjit::Label] +// ============================================================================ + +//! Label (jump target or data location). +//! +//! Label represents a location in code typically used as a jump target, but +//! may be also a reference to some data or a static variable. Label has to be +//! explicitly created by the `Assembler` or any `ExternalTool` by using their +//! `newLabel()` function. +//! +//! Example of using labels: +//! +//! ~~~ +//! // Create Assembler/Compiler. +//! X86Assembler a; +//! +//! // Create Label instance. +//! Label L1 = a.newLabel(); +//! +//! // ... your code ... +//! +//! // Using label. +//! a.jump(L1); +//! +//! // ... your code ... +//! +//! // Bind label to the current position, see `Assembler::bind()`. +//! a.bind(L1); +//! ~~~ +class Label : public Operand { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create new, unassociated label. + ASMJIT_INLINE Label() noexcept : Operand(NoInit) { + reset(); + } + + explicit ASMJIT_INLINE Label(uint32_t id) noexcept : Operand(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeLabel, 0, 0, 0, id); + _init_packed_d2_d3(0, 0); + } + + //! Create reference to another label. + ASMJIT_INLINE Label(const Label& other) noexcept : Operand(other) {} + + explicit ASMJIT_INLINE Label(const _NoInit&) noexcept : Operand(NoInit) {} + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() noexcept { + _init_packed_op_sz_b0_b1_id(kTypeLabel, 0, 0, 0, kInvalidValue); + _init_packed_d2_d3(0, 0); + } + + // -------------------------------------------------------------------------- + // [Label Specific] + // -------------------------------------------------------------------------- + + //! Get whether the label has been initialized by `Assembler` or `Compiler`. + ASMJIT_INLINE bool isInitialized() const noexcept { return _label.id != kInvalidValue; } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Label& operator=(const Label& other) noexcept { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const Label& other) const noexcept { return _base.id == other._base.id; } + ASMJIT_INLINE bool operator!=(const Label& other) const noexcept { return _base.id != other._base.id; } +}; + +// ============================================================================ +// [asmjit::Var] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! Base class for all variables. +class Var : public Operand { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Var() noexcept : Operand(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeVar, 0, 0, 0, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, kInvalidValue); + } + + ASMJIT_INLINE Var(const Var& other) noexcept : Operand(other) {} + + explicit ASMJIT_INLINE Var(const _NoInit&) noexcept : Operand(NoInit) {} + + // -------------------------------------------------------------------------- + // [Var Specific] + // -------------------------------------------------------------------------- + + //! Clone `Var` operand. + ASMJIT_INLINE Var clone() const noexcept { return Var(*this); } + + //! Reset Var operand. + ASMJIT_INLINE void reset() noexcept { + _init_packed_op_sz_b0_b1_id(kTypeVar, 0, kInvalidReg, kInvalidReg, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, kInvalidValue); + } + + //! Get whether the variable has been initialized by `Compiler`. + ASMJIT_INLINE bool isInitialized() const noexcept { return _vreg.id != kInvalidValue; } + //! Get variable type. + ASMJIT_INLINE uint32_t getVarType() const noexcept { return _vreg.vType; } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Var& operator=(const Var& other) noexcept { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const Var& other) const noexcept { return _packed[0] == other._packed[0]; } + ASMJIT_INLINE bool operator!=(const Var& other) const noexcept { return !operator==(other); } +}; +#endif // !ASMJIT_DISABLE_COMPILER + +// ============================================================================ +// [asmjit::Operand - Globals] +// ============================================================================ + +//! No operand, can be used to reset an operand by assignment or to refer to an +//! operand that doesn't exist. +ASMJIT_VARAPI const Operand noOperand; + +//! Create a signed immediate operand. +static ASMJIT_INLINE Imm imm(int64_t val) noexcept { + return Imm(val); +} + +//! Create an unsigned immediate operand. +static ASMJIT_INLINE Imm imm_u(uint64_t val) noexcept { + return Imm(static_cast(val)); +} + +//! Create a `void*` immediate operand. +template +static ASMJIT_INLINE Imm imm_ptr(T p) noexcept { + return Imm(static_cast((intptr_t)p)); +} + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_OPERAND_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/podvector.cpp b/DynamicHooks/thirdparty/AsmJit/base/podvector.cpp new file mode 100644 index 0000000..8e02035 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/podvector.cpp @@ -0,0 +1,132 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/podvector.h" +#include "../base/utils.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::PodVectorBase - NullData] +// ============================================================================ + +const PodVectorBase::Data PodVectorBase::_nullData = { 0, 0 }; + +static ASMJIT_INLINE bool isDataStatic(PodVectorBase* self, PodVectorBase::Data* d) noexcept { + return (void*)(self + 1) == (void*)d; +} + +// ============================================================================ +// [asmjit::PodVectorBase - Reset] +// ============================================================================ + +//! Clear vector data and free internal buffer. +void PodVectorBase::reset(bool releaseMemory) noexcept { + Data* d = _d; + if (d == &_nullData) + return; + + if (releaseMemory && !isDataStatic(this, d)) { + ASMJIT_FREE(d); + _d = const_cast(&_nullData); + return; + } + + d->length = 0; +} + +// ============================================================================ +// [asmjit::PodVectorBase - Helpers] +// ============================================================================ + +Error PodVectorBase::_grow(size_t n, size_t sizeOfT) noexcept { + Data* d = _d; + + size_t threshold = kMemAllocGrowMax / sizeOfT; + size_t capacity = d->capacity; + size_t after = d->length; + + if (IntTraits::maxValue() - n < after) + return kErrorNoHeapMemory; + + after += n; + + if (capacity >= after) + return kErrorOk; + + // PodVector is used as a linear array for some data structures used by + // AsmJit code generation. The purpose of this agressive growing schema + // is to minimize memory reallocations, because AsmJit code generation + // classes live short life and will be freed or reused soon. + if (capacity < 32) + capacity = 32; + else if (capacity < 128) + capacity = 128; + else if (capacity < 512) + capacity = 512; + + while (capacity < after) { + if (capacity < threshold) + capacity *= 2; + else + capacity += threshold; + } + + return _reserve(capacity, sizeOfT); +} + +Error PodVectorBase::_reserve(size_t n, size_t sizeOfT) noexcept { + Data* d = _d; + + if (d->capacity >= n) + return kErrorOk; + + size_t nBytes = sizeof(Data) + n * sizeOfT; + if (ASMJIT_UNLIKELY(nBytes < n)) + return kErrorNoHeapMemory; + + if (d == &_nullData) { + d = static_cast(ASMJIT_ALLOC(nBytes)); + if (ASMJIT_UNLIKELY(d == nullptr)) + return kErrorNoHeapMemory; + d->length = 0; + } + else { + if (isDataStatic(this, d)) { + Data* oldD = d; + + d = static_cast(ASMJIT_ALLOC(nBytes)); + if (ASMJIT_UNLIKELY(d == nullptr)) + return kErrorNoHeapMemory; + + size_t len = oldD->length; + d->length = len; + ::memcpy(d->getData(), oldD->getData(), len * sizeOfT); + } + else { + d = static_cast(ASMJIT_REALLOC(d, nBytes)); + if (ASMJIT_UNLIKELY(d == nullptr)) + return kErrorNoHeapMemory; + } + } + + d->capacity = n; + _d = d; + + return kErrorOk; +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/podvector.h b/DynamicHooks/thirdparty/AsmJit/base/podvector.h new file mode 100644 index 0000000..ff7efe5 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/podvector.h @@ -0,0 +1,281 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_PODVECTOR_H +#define _ASMJIT_BASE_PODVECTOR_H + +// [Dependencies] +#include "../base/globals.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::PodVectorBase] +// ============================================================================ + +//! \internal +class PodVectorBase { + public: + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + //! \internal + struct Data { + //! Get data. + ASMJIT_INLINE void* getData() const noexcept { + return static_cast(const_cast(this + 1)); + } + + //! Capacity of the vector. + size_t capacity; + //! Length of the vector. + size_t length; + }; + + static ASMJIT_API const Data _nullData; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new instance of `PodVectorBase`. + ASMJIT_INLINE PodVectorBase() noexcept : _d(const_cast(&_nullData)) {} + //! Destroy the `PodVectorBase` and its data. + ASMJIT_INLINE ~PodVectorBase() noexcept { reset(true); } + +protected: + explicit ASMJIT_INLINE PodVectorBase(Data* d) noexcept : _d(d) {} + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + +public: + //! Reset the vector data and set its `length` to zero. + //! + //! If `releaseMemory` is true the vector buffer will be released to the + //! system. + ASMJIT_API void reset(bool releaseMemory = false) noexcept; + + // -------------------------------------------------------------------------- + // [Grow / Reserve] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_API Error _grow(size_t n, size_t sizeOfT) noexcept; + ASMJIT_API Error _reserve(size_t n, size_t sizeOfT) noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +public: + Data* _d; +}; + +// ============================================================================ +// [asmjit::PodVector] +// ============================================================================ + +//! Template used to store and manage array of POD data. +//! +//! This template has these adventages over other vector<> templates: +//! - Non-copyable (designed to be non-copyable, we want it) +//! - No copy-on-write (some implementations of stl can use it) +//! - Optimized for working only with POD types +//! - Uses ASMJIT_... memory management macros +template +class PodVector : public PodVectorBase { + public: + ASMJIT_NO_COPY(PodVector) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new instance of `PodVector`. + ASMJIT_INLINE PodVector() noexcept {} + //! Destroy the `PodVector` and its data. + ASMJIT_INLINE ~PodVector() noexcept {} + +protected: + explicit ASMJIT_INLINE PodVector(Data* d) noexcept : PodVectorBase(d) {} + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + +public: + //! Get whether the vector is empty. + ASMJIT_INLINE bool isEmpty() const noexcept { return _d->length == 0; } + //! Get length. + ASMJIT_INLINE size_t getLength() const noexcept { return _d->length; } + //! Get capacity. + ASMJIT_INLINE size_t getCapacity() const noexcept { return _d->capacity; } + //! Get data. + ASMJIT_INLINE T* getData() noexcept { return static_cast(_d->getData()); } + //! \overload + ASMJIT_INLINE const T* getData() const noexcept { return static_cast(_d->getData()); } + + // -------------------------------------------------------------------------- + // [Grow / Reserve] + // -------------------------------------------------------------------------- + + //! Called to grow the buffer to fit at least `n` elements more. + ASMJIT_INLINE Error _grow(size_t n) noexcept { return PodVectorBase::_grow(n, sizeof(T)); } + //! Realloc internal array to fit at least `n` items. + ASMJIT_INLINE Error _reserve(size_t n) noexcept { return PodVectorBase::_reserve(n, sizeof(T)); } + + // -------------------------------------------------------------------------- + // [Ops] + // -------------------------------------------------------------------------- + + //! Prepend `item` to vector. + Error prepend(const T& item) noexcept { + Data* d = _d; + + if (d->length == d->capacity) { + ASMJIT_PROPAGATE_ERROR(_grow(1)); + _d = d; + } + + ::memmove(static_cast(d->getData()) + 1, d->getData(), d->length * sizeof(T)); + ::memcpy(d->getData(), &item, sizeof(T)); + + d->length++; + return kErrorOk; + } + + //! Insert an `item` at the `index`. + Error insert(size_t index, const T& item) noexcept { + Data* d = _d; + ASMJIT_ASSERT(index <= d->length); + + if (d->length == d->capacity) { + ASMJIT_PROPAGATE_ERROR(_grow(1)); + d = _d; + } + + T* dst = static_cast(d->getData()) + index; + ::memmove(dst + 1, dst, d->length - index); + ::memcpy(dst, &item, sizeof(T)); + + d->length++; + return kErrorOk; + } + + //! Append `item` to vector. + Error append(const T& item) noexcept { + Data* d = _d; + + if (d->length == d->capacity) { + ASMJIT_PROPAGATE_ERROR(_grow(1)); + d = _d; + } + + ::memcpy(static_cast(d->getData()) + d->length, &item, sizeof(T)); + + d->length++; + return kErrorOk; + } + + //! Get index of `val` or `kInvalidIndex` if not found. + size_t indexOf(const T& val) const noexcept { + Data* d = _d; + + const T* data = static_cast(d->getData()); + size_t len = d->length; + + for (size_t i = 0; i < len; i++) + if (data[i] == val) + return i; + + return kInvalidIndex; + } + + //! Remove item at index `i`. + void removeAt(size_t i) noexcept { + Data* d = _d; + ASMJIT_ASSERT(i < d->length); + + T* data = static_cast(d->getData()) + i; + d->length--; + ::memmove(data, data + 1, d->length - i); + } + + //! Swap this pod-vector with `other`. + void swap(PodVector& other) noexcept { + T* otherData = other._d; + other._d = _d; + _d = otherData; + } + + //! Get item at index `i`. + ASMJIT_INLINE T& operator[](size_t i) noexcept { + ASMJIT_ASSERT(i < getLength()); + return getData()[i]; + } + + //! Get item at index `i`. + ASMJIT_INLINE const T& operator[](size_t i) const noexcept { + ASMJIT_ASSERT(i < getLength()); + return getData()[i]; + } +}; + +// ============================================================================ +// [asmjit::PodVectorTmp] +// ============================================================================ + +template +class PodVectorTmp : public PodVector { + public: + ASMJIT_NO_COPY(PodVectorTmp) + + // -------------------------------------------------------------------------- + // [StaticData] + // -------------------------------------------------------------------------- + + struct StaticData : public PodVectorBase::Data { + char data[sizeof(T) * N]; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new instance of `PodVectorTmp`. + ASMJIT_INLINE PodVectorTmp() noexcept : PodVector(&_staticData) { + _staticData.capacity = N; + _staticData.length = 0; + } + //! Destroy the `PodVectorTmp` and its data. + ASMJIT_INLINE ~PodVectorTmp() noexcept {} + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + StaticData _staticData; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_PODVECTOR_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/runtime.cpp b/DynamicHooks/thirdparty/AsmJit/base/runtime.cpp new file mode 100644 index 0000000..ee9aa49 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/runtime.cpp @@ -0,0 +1,214 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/assembler.h" +#include "../base/runtime.h" + +// TODO: Rename this, or make call conv independent of CompilerFunc. +#include "../base/compilerfunc.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::Runtime - Utilities] +// ============================================================================ + +static ASMJIT_INLINE uint32_t hostStackAlignment() noexcept { + // By default a pointer-size stack alignment is assumed. + uint32_t alignment = sizeof(intptr_t); + + // ARM & ARM64 + // ----------- + // + // - 32-bit ARM requires stack to be aligned to 8 bytes. + // - 64-bit ARM requires stack to be aligned to 16 bytes. +#if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 + alignment = ASMJIT_ARCH_ARM32 ? 8 : 16; +#endif + + // X86 & X64 + // --------- + // + // - 32-bit X86 requires stack to be aligned to 4 bytes. Modern Linux, APPLE + // and UNIX guarantees 16-byte stack alignment even in 32-bit, but I'm + // not sure about all other UNIX operating systems, because 16-byte alignment + // is addition to an older specification. + // - 64-bit X86 requires stack to be aligned to 16 bytes. +#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 + int modernOS = ASMJIT_OS_LINUX || // Linux & ANDROID. + ASMJIT_OS_MAC || // OSX and iOS. + ASMJIT_OS_BSD; // BSD variants. + alignment = ASMJIT_ARCH_X64 || modernOS ? 16 : 4; +#endif + + return alignment; +} + +static ASMJIT_INLINE void hostFlushInstructionCache(void* p, size_t size) noexcept { + // Only useful on non-x86 architectures. +#if !ASMJIT_ARCH_X86 && !ASMJIT_ARCH_X64 +# if ASMJIT_OS_WINDOWS + // Windows has a built-in support in kernel32.dll. + ::FlushInstructionCache(_memMgr.getProcessHandle(), p, size); +# endif // ASMJIT_OS_WINDOWS +#else + ASMJIT_UNUSED(p); + ASMJIT_UNUSED(size); +#endif // !ASMJIT_ARCH_X86 && !ASMJIT_ARCH_X64 +} + +// ============================================================================ +// [asmjit::Runtime - Construction / Destruction] +// ============================================================================ + +Runtime::Runtime() noexcept + : _runtimeType(kTypeNone), + _allocType(kVMemAllocFreeable), + _cpuInfo(), + _stackAlignment(0), + _cdeclConv(kCallConvNone), + _stdCallConv(kCallConvNone), + _baseAddress(kNoBaseAddress), + _sizeLimit(0) { + + ::memset(_reserved, 0, sizeof(_reserved)); +} +Runtime::~Runtime() noexcept {} + +// ============================================================================ +// [asmjit::HostRuntime - Construction / Destruction] +// ============================================================================ + +HostRuntime::HostRuntime() noexcept { + _runtimeType = kTypeJit; + _cpuInfo = CpuInfo::getHost(); + + _stackAlignment = hostStackAlignment(); + _cdeclConv = kCallConvHostCDecl; + _stdCallConv = kCallConvHostStdCall; +} +HostRuntime::~HostRuntime() noexcept {} + +// ============================================================================ +// [asmjit::HostRuntime - Interface] +// ============================================================================ + +void HostRuntime::flush(void* p, size_t size) noexcept { + hostFlushInstructionCache(p, size); +} + +// ============================================================================ +// [asmjit::StaticRuntime - Construction / Destruction] +// ============================================================================ + +StaticRuntime::StaticRuntime(void* baseAddress, size_t sizeLimit) noexcept { + _sizeLimit = sizeLimit; + _baseAddress = static_cast((uintptr_t)baseAddress); +} +StaticRuntime::~StaticRuntime() noexcept {} + +// ============================================================================ +// [asmjit::StaticRuntime - Interface] +// ============================================================================ + +Error StaticRuntime::add(void** dst, Assembler* assembler) noexcept { + size_t codeSize = assembler->getCodeSize(); + size_t sizeLimit = _sizeLimit; + + if (codeSize == 0) { + *dst = nullptr; + return kErrorNoCodeGenerated; + } + + if (sizeLimit != 0 && sizeLimit < codeSize) { + *dst = nullptr; + return kErrorCodeTooLarge; + } + + Ptr baseAddress = _baseAddress; + uint8_t* p = static_cast((void*)static_cast(baseAddress)); + + // Since the base address is known the `relocSize` returned should be equal + // to `codeSize`. It's better to fail if they don't match instead of passsing + // silently. + size_t relocSize = assembler->relocCode(p, baseAddress); + if (relocSize == 0 || codeSize != relocSize) { + *dst = nullptr; + return kErrorInvalidState; + } + + _baseAddress += codeSize; + if (sizeLimit) + sizeLimit -= codeSize; + + flush(p, codeSize); + *dst = p; + + return kErrorOk; +} + +Error StaticRuntime::release(void* p) noexcept { + // There is nothing to release as `StaticRuntime` doesn't manage any memory. + ASMJIT_UNUSED(p); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::JitRuntime - Construction / Destruction] +// ============================================================================ + +JitRuntime::JitRuntime() noexcept {} +JitRuntime::~JitRuntime() noexcept {} + +// ============================================================================ +// [asmjit::JitRuntime - Interface] +// ============================================================================ + +Error JitRuntime::add(void** dst, Assembler* assembler) noexcept { + size_t codeSize = assembler->getCodeSize(); + if (codeSize == 0) { + *dst = nullptr; + return kErrorNoCodeGenerated; + } + + void* p = _memMgr.alloc(codeSize, getAllocType()); + if (p == nullptr) { + *dst = nullptr; + return kErrorNoVirtualMemory; + } + + // Relocate the code and release the unused memory back to `VMemMgr`. + size_t relocSize = assembler->relocCode(p); + if (relocSize == 0) { + *dst = nullptr; + _memMgr.release(p); + return kErrorInvalidState; + } + + if (relocSize < codeSize) + _memMgr.shrink(p, relocSize); + + flush(p, relocSize); + *dst = p; + + return kErrorOk; +} + +Error JitRuntime::release(void* p) noexcept { + return _memMgr.release(p); +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/runtime.h b/DynamicHooks/thirdparty/AsmJit/base/runtime.h new file mode 100644 index 0000000..9239a30 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/runtime.h @@ -0,0 +1,266 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_RUNTIME_H +#define _ASMJIT_BASE_RUNTIME_H + +// [Dependencies] +#include "../base/cpuinfo.h" +#include "../base/vmem.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class Assembler; +class CpuInfo; + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::Runtime] +// ============================================================================ + +//! Base runtime. +class ASMJIT_VIRTAPI Runtime { + public: + ASMJIT_NO_COPY(Runtime) + + // -------------------------------------------------------------------------- + // [asmjit::RuntimeType] + // -------------------------------------------------------------------------- + + ASMJIT_ENUM(Type) { + kTypeNone = 0, + kTypeJit = 1, + kTypeRemote = 2 + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a `Runtime` instance. + ASMJIT_API Runtime() noexcept; + //! Destroy the `Runtime` instance. + ASMJIT_API virtual ~Runtime() noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the runtime type, see \ref Type. + ASMJIT_INLINE uint32_t getRuntimeType() const noexcept { return _runtimeType; } + + //! Get stack alignment of the target. + ASMJIT_INLINE uint32_t getStackAlignment() const noexcept { return _stackAlignment; } + + //! Get the CDECL calling convention conforming to the runtime's ABI. + //! + //! NOTE: This is a default calling convention used by the runtime's target. + ASMJIT_INLINE uint32_t getCdeclConv() const noexcept { return _cdeclConv; } + //! Get the STDCALL calling convention conforming to the runtime's ABI. + //! + //! NOTE: STDCALL calling convention is only used by 32-bit x86 target. On + //! all other targets it's mapped to CDECL and calling `getStdcallConv()` will + //! return the same as `getCdeclConv()`. + ASMJIT_INLINE uint32_t getStdCallConv() const noexcept { return _stdCallConv; } + + //! Get CPU information. + ASMJIT_INLINE const CpuInfo& getCpuInfo() const noexcept { return _cpuInfo; } + //! Set CPU information. + ASMJIT_INLINE void setCpuInfo(const CpuInfo& ci) noexcept { _cpuInfo = ci; } + + //! Get whether the runtime has a base address. + ASMJIT_INLINE bool hasBaseAddress() const noexcept { return _baseAddress != kNoBaseAddress; } + //! Get the base address. + ASMJIT_INLINE Ptr getBaseAddress() const noexcept { return _baseAddress; } + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! Allocate a memory needed for a code generated by `assembler` and + //! relocate it to the target location. + //! + //! The beginning of the memory allocated for the function is returned in + //! `dst`. Returns Status code as \ref ErrorCode, on failure `dst` is set to + //! `nullptr`. + virtual Error add(void** dst, Assembler* assembler) noexcept = 0; + + //! Release memory allocated by `add`. + virtual Error release(void* p) noexcept = 0; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Type of the runtime. + uint8_t _runtimeType; + //! Type of the allocation. + uint8_t _allocType; + + //! Runtime's stack alignment. + uint8_t _stackAlignment; + //! CDECL calling convention conforming to runtime ABI. + uint8_t _cdeclConv; + //! STDCALL calling convention conforming to runtime ABI. + uint8_t _stdCallConv; + //! \internal + uint8_t _reserved[3]; + + //! Runtime CPU information. + CpuInfo _cpuInfo; + + //! Base address (-1 means no base address). + Ptr _baseAddress; + //! Maximum size of the code that can be added to the runtime (0=unlimited). + size_t _sizeLimit; +}; + +// ============================================================================ +// [asmjit::HostRuntime] +// ============================================================================ + +//! Base runtime for JIT code generation. +class ASMJIT_VIRTAPI HostRuntime : public Runtime { + public: + ASMJIT_NO_COPY(HostRuntime) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a `HostRuntime` instance. + ASMJIT_API HostRuntime() noexcept; + //! Destroy the `HostRuntime` instance. + ASMJIT_API virtual ~HostRuntime() noexcept; + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! Flush an instruction cache. + //! + //! This member function is called after the code has been copied to the + //! destination buffer. It is only useful for JIT code generation as it + //! causes a flush of the processor cache. + //! + //! Flushing is basically a NOP under X86/X64, but is needed by architectures + //! that do not have a transparent instruction cache. + //! + //! This function can also be overridden to improve compatibility with tools + //! such as Valgrind, however, it's not an official part of AsmJit. + ASMJIT_API virtual void flush(void* p, size_t size) noexcept; +}; + +// ============================================================================ +// [asmjit::StaticRuntime] +// ============================================================================ + +//! JIT static runtime. +//! +//! JIT static runtime can be used to generate code to a memory location that +//! is known. +class ASMJIT_VIRTAPI StaticRuntime : public HostRuntime { + public: + ASMJIT_NO_COPY(StaticRuntime) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a `StaticRuntime` instance. + //! + //! The `address` specifies a fixed target address, which will be used as a + //! base address for relocation, and `sizeLimit` specifies the maximum size + //! of a code that can be copied to it. If there is no limit `sizeLimit` + //! should be zero. + ASMJIT_API StaticRuntime(void* baseAddress, size_t sizeLimit = 0) noexcept; + //! Destroy the `StaticRuntime` instance. + ASMJIT_API virtual ~StaticRuntime() noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the base address. + ASMJIT_INLINE Ptr getBaseAddress() const noexcept { return _baseAddress; } + + //! Get the maximum size of the code that can be relocated/stored in the target. + //! + //! Returns zero if unlimited. + ASMJIT_INLINE size_t getSizeLimit() const noexcept { return _sizeLimit; } + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error add(void** dst, Assembler* assembler) noexcept; + ASMJIT_API virtual Error release(void* p) noexcept; +}; + +// ============================================================================ +// [asmjit::JitRuntime] +// ============================================================================ + +//! JIT runtime. +class ASMJIT_VIRTAPI JitRuntime : public HostRuntime { + public: + ASMJIT_NO_COPY(JitRuntime) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a `JitRuntime` instance. + ASMJIT_API JitRuntime() noexcept; + //! Destroy the `JitRuntime` instance. + ASMJIT_API virtual ~JitRuntime() noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the type of allocation. + ASMJIT_INLINE uint32_t getAllocType() const noexcept { return _allocType; } + //! Set the type of allocation. + ASMJIT_INLINE void setAllocType(uint32_t allocType) noexcept { _allocType = allocType; } + + //! Get the virtual memory manager. + ASMJIT_INLINE VMemMgr* getMemMgr() const noexcept { return const_cast(&_memMgr); } + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error add(void** dst, Assembler* assembler) noexcept; + ASMJIT_API virtual Error release(void* p) noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Virtual memory manager. + VMemMgr _memMgr; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_RUNTIME_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/utils.cpp b/DynamicHooks/thirdparty/AsmJit/base/utils.cpp new file mode 100644 index 0000000..cc21188 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/utils.cpp @@ -0,0 +1,289 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/utils.h" + +#if ASMJIT_OS_POSIX +# include +# include +#endif // ASMJIT_OS_POSIX + +#if ASMJIT_OS_MAC +# include +#endif // ASMJIT_OS_MAC + +#if ASMJIT_OS_WINDOWS +# if defined(_MSC_VER) && _MSC_VER >= 1400 +# include +# else +# define _InterlockedCompareExchange InterlockedCompareExchange +# endif // _MSC_VER +#endif // ASMJIT_OS_WINDOWS + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::CpuTicks - Windows] +// ============================================================================ + +#if ASMJIT_OS_WINDOWS +static volatile uint32_t Utils_hiResTicks; +static volatile double Utils_hiResFreq; + +uint32_t Utils::getTickCount() noexcept { + do { + uint32_t hiResOk = Utils_hiResTicks; + + if (hiResOk == 1) { + LARGE_INTEGER now; + if (!::QueryPerformanceCounter(&now)) + break; + return (int64_t)(double(now.QuadPart) / Utils_hiResFreq); + } + + if (hiResOk == 0) { + LARGE_INTEGER qpf; + if (!::QueryPerformanceFrequency(&qpf)) { + _InterlockedCompareExchange((LONG*)&Utils_hiResTicks, 0xFFFFFFFF, 0); + break; + } + + LARGE_INTEGER now; + if (!::QueryPerformanceCounter(&now)) { + _InterlockedCompareExchange((LONG*)&Utils_hiResTicks, 0xFFFFFFFF, 0); + break; + } + + double freqDouble = double(qpf.QuadPart) / 1000.0; + Utils_hiResFreq = freqDouble; + _InterlockedCompareExchange((LONG*)&Utils_hiResTicks, 1, 0); + + return static_cast( + static_cast(double(now.QuadPart) / freqDouble) & 0xFFFFFFFF); + } + } while (0); + + // Bail to a less precise GetTickCount(). + return ::GetTickCount(); +} + +// ============================================================================ +// [asmjit::CpuTicks - Mac] +// ============================================================================ + +#elif ASMJIT_OS_MAC +static mach_timebase_info_data_t CpuTicks_machTime; + +uint32_t Utils::getTickCount() noexcept { + // Initialize the first time CpuTicks::now() is called (See Apple's QA1398). + if (CpuTicks_machTime.denom == 0) { + if (mach_timebase_info(&CpuTicks_machTime) != KERN_SUCCESS) + return 0; + } + + // mach_absolute_time() returns nanoseconds, we need just milliseconds. + uint64_t t = mach_absolute_time() / 1000000; + + t = t * CpuTicks_machTime.numer / CpuTicks_machTime.denom; + return static_cast(t & 0xFFFFFFFFU); +} + +// ============================================================================ +// [asmjit::CpuTicks - Posix] +// ============================================================================ + +#else +uint32_t Utils::getTickCount() noexcept { +#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0 + struct timespec ts; + + if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) + return 0; + + uint64_t t = (uint64_t(ts.tv_sec ) * 1000) + (uint64_t(ts.tv_nsec) / 1000000); + return static_cast(t & 0xFFFFFFFFU); +#else // _POSIX_MONOTONIC_CLOCK +#error "[asmjit] Utils::getTickCount() is not implemented for your target OS." + return 0; +#endif // _POSIX_MONOTONIC_CLOCK +} +#endif // ASMJIT_OS + +// ============================================================================ +// [asmjit::Utils - Unit] +// ============================================================================ + +#if defined(ASMJIT_TEST) +UNIT(base_utils) { + uint32_t i; + + INFO("IntTraits<>."); + EXPECT(IntTraits::kIsSigned,"IntTraits should report signed."); + EXPECT(IntTraits::kIsSigned, "IntTraits should report signed."); + EXPECT(IntTraits::kIsSigned, "IntTraits should report signed."); + EXPECT(IntTraits::kIsSigned, "IntTraits should report signed."); + + EXPECT(IntTraits::kIsUnsigned, "IntTraits should report unsigned."); + EXPECT(IntTraits::kIsUnsigned, "IntTraits should report unsigned."); + EXPECT(IntTraits::kIsUnsigned, "IntTraits should report unsigned."); + EXPECT(IntTraits::kIsUnsigned, "IntTraits should report unsigned."); + + EXPECT(IntTraits::kIsSigned, "IntTraits should report signed."); + EXPECT(IntTraits::kIsUnsigned, "IntTraits should report unsigned."); + + EXPECT(IntTraits::kIsIntPtr, "IntTraits should report intptr_t type."); + EXPECT(IntTraits::kIsIntPtr, "IntTraits should report intptr_t type."); + + INFO("Utils::iMin()/iMax()."); + EXPECT(Utils::iMin( 0, -1) == -1, "Utils::iMin should return a minimum value."); + EXPECT(Utils::iMin(-1, -2) == -2, "Utils::iMin should return a minimum value."); + EXPECT(Utils::iMin( 1, 2) == 1, "Utils::iMin should return a minimum value."); + + EXPECT(Utils::iMax( 0, -1) == 0, "Utils::iMax should return a maximum value."); + EXPECT(Utils::iMax(-1, -2) == -1, "Utils::iMax should return a maximum value."); + EXPECT(Utils::iMax( 1, 2) == 2, "Utils::iMax should return a maximum value."); + + INFO("Utils::inInterval()."); + EXPECT(Utils::inInterval(11 , 10, 20) == true , "Utils::inInterval should return true if inside."); + EXPECT(Utils::inInterval(101, 10, 20) == false, "Utils::inInterval should return false if outside."); + + INFO("Utils::isInt8()."); + EXPECT(Utils::isInt8(-128) == true , "Utils::isInt8<> should return true if inside."); + EXPECT(Utils::isInt8( 127) == true , "Utils::isInt8<> should return true if inside."); + EXPECT(Utils::isInt8(-129) == false, "Utils::isInt8<> should return false if outside."); + EXPECT(Utils::isInt8( 128) == false, "Utils::isInt8<> should return false if outside."); + + INFO("Utils::isInt16()."); + EXPECT(Utils::isInt16(-32768) == true , "Utils::isInt16<> should return true if inside."); + EXPECT(Utils::isInt16( 32767) == true , "Utils::isInt16<> should return true if inside."); + EXPECT(Utils::isInt16(-32769) == false, "Utils::isInt16<> should return false if outside."); + EXPECT(Utils::isInt16( 32768) == false, "Utils::isInt16<> should return false if outside."); + + INFO("Utils::isInt32()."); + EXPECT(Utils::isInt32( 2147483647 ) == true, "Utils::isInt32 should return true if inside."); + EXPECT(Utils::isInt32(-2147483647 - 1) == true, "Utils::isInt32 should return true if inside."); + EXPECT(Utils::isInt32(ASMJIT_UINT64_C(2147483648)) == false, "Utils::isInt32 should return false if outside."); + EXPECT(Utils::isInt32(ASMJIT_UINT64_C(0xFFFFFFFF)) == false, "Utils::isInt32 should return false if outside."); + EXPECT(Utils::isInt32(ASMJIT_UINT64_C(0xFFFFFFFF) + 1) == false, "Utils::isInt32 should return false if outside."); + + INFO("Utils::isUInt8()."); + EXPECT(Utils::isUInt8(0) == true , "Utils::isUInt8<> should return true if inside."); + EXPECT(Utils::isUInt8(255) == true , "Utils::isUInt8<> should return true if inside."); + EXPECT(Utils::isUInt8(256) == false, "Utils::isUInt8<> should return false if outside."); + EXPECT(Utils::isUInt8(-1) == false, "Utils::isUInt8<> should return false if negative."); + + INFO("Utils::isUInt12()."); + EXPECT(Utils::isUInt12(0) == true , "Utils::isUInt12<> should return true if inside."); + EXPECT(Utils::isUInt12(4095) == true , "Utils::isUInt12<> should return true if inside."); + EXPECT(Utils::isUInt12(4096) == false, "Utils::isUInt12<> should return false if outside."); + EXPECT(Utils::isUInt12(-1) == false, "Utils::isUInt12<> should return false if negative."); + + INFO("Utils::isUInt16()."); + EXPECT(Utils::isUInt16(0) == true , "Utils::isUInt16<> should return true if inside."); + EXPECT(Utils::isUInt16(65535) == true , "Utils::isUInt16<> should return true if inside."); + EXPECT(Utils::isUInt16(65536) == false, "Utils::isUInt16<> should return false if outside."); + EXPECT(Utils::isUInt16(-1) == false, "Utils::isUInt16<> should return false if negative."); + + INFO("Utils::isUInt32()."); + EXPECT(Utils::isUInt32(ASMJIT_UINT64_C(0xFFFFFFFF)) == true, "Utils::isUInt32 should return true if inside."); + EXPECT(Utils::isUInt32(ASMJIT_UINT64_C(0xFFFFFFFF) + 1) == false, "Utils::isUInt32 should return false if outside."); + EXPECT(Utils::isUInt32(-1) == false, "Utils::isUInt32 should return false if negative."); + + INFO("Utils::isPower2()."); + for (i = 0; i < 64; i++) { + EXPECT(Utils::isPowerOf2(static_cast(1) << i) == true, + "Utils::isPower2() didn't report power of 2."); + EXPECT(Utils::isPowerOf2((static_cast(1) << i) ^ 0x001101) == false, + "Utils::isPower2() didn't report not power of 2."); + } + + INFO("Utils::mask()."); + for (i = 0; i < 32; i++) { + EXPECT(Utils::mask(i) == (1 << i), + "Utils::mask(%u) should return %X.", i, (1 << i)); + } + + INFO("Utils::bits()."); + for (i = 0; i < 32; i++) { + uint32_t expectedBits = 0; + + for (uint32_t b = 0; b < i; b++) + expectedBits |= static_cast(1) << b; + + EXPECT(Utils::bits(i) == expectedBits, + "Utils::bits(%u) should return %X.", i, expectedBits); + } + + INFO("Utils::hasBit()."); + for (i = 0; i < 32; i++) { + EXPECT(Utils::hasBit((1 << i), i) == true, + "Utils::hasBit(%X, %u) should return true.", (1 << i), i); + } + + INFO("Utils::bitCount()."); + for (i = 0; i < 32; i++) { + EXPECT(Utils::bitCount((1 << i)) == 1, + "Utils::bitCount(%X) should return true.", (1 << i)); + } + EXPECT(Utils::bitCount(0x000000F0) == 4, ""); + EXPECT(Utils::bitCount(0x10101010) == 4, ""); + EXPECT(Utils::bitCount(0xFF000000) == 8, ""); + EXPECT(Utils::bitCount(0xFFFFFFF7) == 31, ""); + EXPECT(Utils::bitCount(0x7FFFFFFF) == 31, ""); + + INFO("Utils::findFirstBit()."); + for (i = 0; i < 32; i++) { + EXPECT(Utils::findFirstBit((1 << i)) == i, + "Utils::findFirstBit(%X) should return %u.", (1 << i), i); + } + + INFO("Utils::keepNOnesFromRight()."); + EXPECT(Utils::keepNOnesFromRight(0xF, 1) == 0x1, ""); + EXPECT(Utils::keepNOnesFromRight(0xF, 2) == 0x3, ""); + EXPECT(Utils::keepNOnesFromRight(0xF, 3) == 0x7, ""); + EXPECT(Utils::keepNOnesFromRight(0x5, 2) == 0x5, ""); + EXPECT(Utils::keepNOnesFromRight(0xD, 2) == 0x5, ""); + + INFO("Utils::isAligned()."); + EXPECT(Utils::isAligned(0xFFFF, 4) == false, ""); + EXPECT(Utils::isAligned(0xFFF4, 4) == true , ""); + EXPECT(Utils::isAligned(0xFFF8, 8) == true , ""); + EXPECT(Utils::isAligned(0xFFF0, 16) == true , ""); + + INFO("Utils::alignTo()."); + EXPECT(Utils::alignTo(0xFFFF, 4) == 0x10000, ""); + EXPECT(Utils::alignTo(0xFFF4, 4) == 0x0FFF4, ""); + EXPECT(Utils::alignTo(0xFFF8, 8) == 0x0FFF8, ""); + EXPECT(Utils::alignTo(0xFFF0, 16) == 0x0FFF0, ""); + EXPECT(Utils::alignTo(0xFFF0, 32) == 0x10000, ""); + + INFO("Utils::alignToPowerOf2()."); + EXPECT(Utils::alignToPowerOf2(0xFFFF) == 0x10000, ""); + EXPECT(Utils::alignToPowerOf2(0xF123) == 0x10000, ""); + EXPECT(Utils::alignToPowerOf2(0x0F00) == 0x01000, ""); + EXPECT(Utils::alignToPowerOf2(0x0100) == 0x00100, ""); + EXPECT(Utils::alignToPowerOf2(0x1001) == 0x02000, ""); + + INFO("Utils::alignDiff()."); + EXPECT(Utils::alignDiff(0xFFFF, 4) == 1, ""); + EXPECT(Utils::alignDiff(0xFFF4, 4) == 0, ""); + EXPECT(Utils::alignDiff(0xFFF8, 8) == 0, ""); + EXPECT(Utils::alignDiff(0xFFF0, 16) == 0, ""); + EXPECT(Utils::alignDiff(0xFFF0, 32) == 16, ""); +} +#endif // ASMJIT_TEST + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/utils.h b/DynamicHooks/thirdparty/AsmJit/base/utils.h new file mode 100644 index 0000000..5c850bf --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/utils.h @@ -0,0 +1,1348 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_UTILS_H +#define _ASMJIT_BASE_UTILS_H + +// [Dependencies] +#include "../base/globals.h" + +#if ASMJIT_CC_MSC_GE(14, 0, 0) +# include +#endif // ASMJIT_OS_WINDOWS + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::IntTraits] +// ============================================================================ + +//! \internal +//! \{ +template +struct IntTraitsPrivate { + // Let it fail if not specialized! +}; + +template<> struct IntTraitsPrivate<1, 0> { typedef int IntType; typedef int8_t SignedType; typedef uint8_t UnsignedType; }; +template<> struct IntTraitsPrivate<1, 1> { typedef int IntType; typedef int8_t SignedType; typedef uint8_t UnsignedType; }; + +template<> struct IntTraitsPrivate<2, 0> { typedef int IntType; typedef int16_t SignedType; typedef uint16_t UnsignedType; }; +template<> struct IntTraitsPrivate<2, 1> { typedef int IntType; typedef int16_t SignedType; typedef uint16_t UnsignedType; }; + +template<> struct IntTraitsPrivate<4, 0> { typedef int64_t IntType; typedef int32_t SignedType; typedef uint32_t UnsignedType; }; +template<> struct IntTraitsPrivate<4, 1> { typedef int IntType; typedef int32_t SignedType; typedef uint32_t UnsignedType; }; + +template<> struct IntTraitsPrivate<8, 0> { typedef int64_t IntType; typedef int64_t SignedType; typedef uint64_t UnsignedType; }; +template<> struct IntTraitsPrivate<8, 1> { typedef int64_t IntType; typedef int64_t SignedType; typedef uint64_t UnsignedType; }; + +//! \internal +template +struct IntTraits { + enum { + kIsSigned = static_cast(~static_cast(0)) < static_cast(0), + kIsUnsigned = !kIsSigned, + + kIs8Bit = sizeof(T) == 1, + kIs16Bit = sizeof(T) == 2, + kIs32Bit = sizeof(T) == 4, + kIs64Bit = sizeof(T) == 8, + + kIsIntPtr = sizeof(T) == sizeof(intptr_t) + }; + + typedef typename IntTraitsPrivate::IntType IntType; + typedef typename IntTraitsPrivate::SignedType SignedType; + typedef typename IntTraitsPrivate::UnsignedType UnsignedType; + + //! Get a minimum value of `T`. + static ASMJIT_INLINE T minValue() noexcept { + if (kIsSigned) + return static_cast((~static_cast(0) >> 1) + static_cast(1)); + else + return static_cast(0); + } + + //! Get a maximum value of `T`. + static ASMJIT_INLINE T maxValue() noexcept { + if (kIsSigned) + return static_cast(~static_cast(0) >> 1); + else + return ~static_cast(0); + } +}; + +//! \} + +// ============================================================================ +// [asmjit::Utils] +// ============================================================================ + +//! AsmJit utilities - integer, string, etc... +struct Utils { + // -------------------------------------------------------------------------- + // [Float <-> Int] + // -------------------------------------------------------------------------- + + //! \internal + union FloatBits { + int32_t i; + float f; + }; + + //! \internal + union DoubleBits { + int64_t i; + double d; + }; + + //! Bit-cast `float` to a 32-bit integer. + static ASMJIT_INLINE int32_t floatAsInt(float f) noexcept { FloatBits m; m.f = f; return m.i; } + //! Bit-cast 32-bit integer to `float`. + static ASMJIT_INLINE float intAsFloat(int32_t i) noexcept { FloatBits m; m.i = i; return m.f; } + + //! Bit-cast `double` to a 64-bit integer. + static ASMJIT_INLINE int64_t doubleAsInt(double d) noexcept { DoubleBits m; m.d = d; return m.i; } + //! Bit-cast 64-bit integer to `double`. + static ASMJIT_INLINE double intAsDouble(int64_t i) noexcept { DoubleBits m; m.i = i; return m.d; } + + // -------------------------------------------------------------------------- + // [Pack / Unpack] + // -------------------------------------------------------------------------- + + //! Pack two 8-bit integer and one 16-bit integer into a 32-bit integer as it + //! is an array of `{b0,b1,w2}`. + static ASMJIT_INLINE uint32_t pack32_2x8_1x16(uint32_t b0, uint32_t b1, uint32_t w2) noexcept { + return ASMJIT_ARCH_LE ? b0 + (b1 << 8) + (w2 << 16) + : (b0 << 24) + (b1 << 16) + w2; + } + + //! Pack four 8-bit integer into a 32-bit integer as it is an array of `{b0,b1,b2,b3}`. + static ASMJIT_INLINE uint32_t pack32_4x8(uint32_t b0, uint32_t b1, uint32_t b2, uint32_t b3) noexcept { + return ASMJIT_ARCH_LE ? b0 + (b1 << 8) + (b2 << 16) + (b3 << 24) + : (b0 << 24) + (b1 << 16) + (b2 << 8) + b3; + } + + //! Pack two 32-bit integer into a 64-bit integer as it is an array of `{u0,u1}`. + static ASMJIT_INLINE uint64_t pack64_2x32(uint32_t u0, uint32_t u1) noexcept { + return ASMJIT_ARCH_LE ? (static_cast(u1) << 32) + u0 + : (static_cast(u0) << 32) + u1; + } + + // -------------------------------------------------------------------------- + // [Position of byte (in bit-shift)] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t byteShiftOfDWordStruct(uint32_t index) noexcept { + if (ASMJIT_ARCH_LE) + return index * 8; + else + return (sizeof(uint32_t) - 1 - index) * 8; + } + + // -------------------------------------------------------------------------- + // [Min/Max] + // -------------------------------------------------------------------------- + + // Some environments declare `min()` and `max()` as preprocessor macros so it + // was decided to use different names to prevent such collision. + + //! Get minimum value of `a` and `b`. + template + static ASMJIT_INLINE T iMin(const T& a, const T& b) noexcept { return a < b ? a : b; } + + //! Get maximum value of `a` and `b`. + template + static ASMJIT_INLINE T iMax(const T& a, const T& b) noexcept { return a > b ? a : b; } + + // -------------------------------------------------------------------------- + // [InInterval] + // -------------------------------------------------------------------------- + + //! Get whether `x` is greater than or equal to `a` and lesses than or equal to `b`. + template + static ASMJIT_INLINE bool inInterval(T x, T a, T b) noexcept { + return x >= a && x <= b; + } + + // -------------------------------------------------------------------------- + // [AsInt] + // -------------------------------------------------------------------------- + + //! Map an integer `x` of type `T` to an `int` or `int64_t`, depending on the + //! type. Used internally by AsmJit to dispatch an argument that can be an + //! arbitrary integer type into a function that accepts either `int` or + //! `int64_t`. + template + static ASMJIT_INLINE typename IntTraits::IntType asInt(T x) noexcept { + return static_cast::IntType>(x); + } + + // -------------------------------------------------------------------------- + // [IsInt / IsUInt] + // -------------------------------------------------------------------------- + + //! Get whether the given integer `x` can be casted to an 8-bit signed integer. + template + static ASMJIT_INLINE bool isInt8(T x) noexcept { + typedef typename IntTraits::SignedType SignedType; + typedef typename IntTraits::UnsignedType UnsignedType; + + if (IntTraits::kIsSigned) + return sizeof(T) <= 1 || inInterval(SignedType(x), -128, 127); + else + return UnsignedType(x) <= UnsignedType(127U); + } + + //! Get whether the given integer `x` can be casted to a 16-bit signed integer. + template + static ASMJIT_INLINE bool isInt16(T x) noexcept { + typedef typename IntTraits::SignedType SignedType; + typedef typename IntTraits::UnsignedType UnsignedType; + + if (IntTraits::kIsSigned) + return sizeof(T) <= 2 || inInterval(SignedType(x), -32768, 32767); + else + return sizeof(T) <= 1 || UnsignedType(x) <= UnsignedType(32767U); + } + + //! Get whether the given integer `x` can be casted to a 32-bit signed integer. + template + static ASMJIT_INLINE bool isInt32(T x) noexcept { + typedef typename IntTraits::SignedType SignedType; + typedef typename IntTraits::UnsignedType UnsignedType; + + if (IntTraits::kIsSigned) + return sizeof(T) <= 4 || inInterval(SignedType(x), -2147483647 - 1, 2147483647); + else + return sizeof(T) <= 2 || UnsignedType(x) <= UnsignedType(2147483647U); + } + + //! Get whether the given integer `x` can be casted to an 8-bit unsigned integer. + template + static ASMJIT_INLINE bool isUInt8(T x) noexcept { + typedef typename IntTraits::UnsignedType UnsignedType; + + if (IntTraits::kIsSigned) + return x >= T(0) && (sizeof(T) <= 1 ? true : x <= T(255)); + else + return sizeof(T) <= 1 || UnsignedType(x) <= UnsignedType(255U); + } + + //! Get whether the given integer `x` can be casted to a 12-bit unsigned integer (ARM specific). + template + static ASMJIT_INLINE bool isUInt12(T x) noexcept { + typedef typename IntTraits::UnsignedType UnsignedType; + + if (IntTraits::kIsSigned) + return x >= T(0) && (sizeof(T) <= 1 ? true : x <= T(4095)); + else + return sizeof(T) <= 1 || UnsignedType(x) <= UnsignedType(4095U); + } + + //! Get whether the given integer `x` can be casted to a 16-bit unsigned integer. + template + static ASMJIT_INLINE bool isUInt16(T x) noexcept { + typedef typename IntTraits::UnsignedType UnsignedType; + + if (IntTraits::kIsSigned) + return x >= T(0) && (sizeof(T) <= 2 ? true : x <= T(65535)); + else + return sizeof(T) <= 2 || UnsignedType(x) <= UnsignedType(65535U); + } + + //! Get whether the given integer `x` can be casted to a 32-bit unsigned integer. + template + static ASMJIT_INLINE bool isUInt32(T x) noexcept { + typedef typename IntTraits::UnsignedType UnsignedType; + + if (IntTraits::kIsSigned) + return x >= T(0) && (sizeof(T) <= 4 ? true : x <= T(4294967295U)); + else + return sizeof(T) <= 4 || UnsignedType(x) <= UnsignedType(4294967295U); + } + + // -------------------------------------------------------------------------- + // [IsPowerOf2] + // -------------------------------------------------------------------------- + + //! Get whether the `n` value is a power of two (only one bit is set). + template + static ASMJIT_INLINE bool isPowerOf2(T n) noexcept { + return n != 0 && (n & (n - 1)) == 0; + } + + // -------------------------------------------------------------------------- + // [Mask] + // -------------------------------------------------------------------------- + + //! Generate a bit-mask that has `x` bit set. + static ASMJIT_INLINE uint32_t mask(uint32_t x) noexcept { + ASMJIT_ASSERT(x < 32); + return static_cast(1) << x; + } + + //! Generate a bit-mask that has `x0` and `x1` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1) noexcept { + return mask(x0) | mask(x1); + } + + //! Generate a bit-mask that has `x0`, `x1` and `x2` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2) noexcept { + return mask(x0, x1) | mask(x2); + } + + //! Generate a bit-mask that has `x0`, `x1`, `x2` and `x3` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) noexcept { + return mask(x0, x1) | mask(x2, x3); + } + + //! Generate a bit-mask that has `x0`, `x1`, `x2`, `x3` and `x4` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) noexcept { + return mask(x0, x1) | mask(x2, x3) | mask(x4); + } + + //! Generate a bit-mask that has `x0`, `x1`, `x2`, `x3`, `x4` and `x5` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5) noexcept { + return mask(x0, x1) | mask(x2, x3) | mask(x4, x5); + } + + //! Generate a bit-mask that has `x0`, `x1`, `x2`, `x3`, `x4`, `x5` and `x6` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6) noexcept { + return mask(x0, x1) | mask(x2, x3) | mask(x4, x5) | mask(x6); + } + + //! Generate a bit-mask that has `x0`, `x1`, `x2`, `x3`, `x4`, `x5`, `x6` and `x7` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) noexcept { + return mask(x0, x1) | mask(x2, x3) | mask(x4, x5) | mask(x6, x7); + } + + //! Generate a bit-mask that has `x0`, `x1`, `x2`, `x3`, `x4`, `x5`, `x6`, `x7` and `x8` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8) noexcept { + return mask(x0, x1) | mask(x2, x3) | mask(x4, x5) | mask(x6, x7) | mask(x8); + } + + //! Generate a bit-mask that has `x0`, `x1`, `x2`, `x3`, `x4`, `x5`, `x6`, `x7`, `x8` and `x9` bits set. + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8, uint32_t x9) noexcept { + return mask(x0, x1) | mask(x2, x3) | mask(x4, x5) | mask(x6, x7) | mask(x8, x9); + } + + // -------------------------------------------------------------------------- + // [Bits] + // -------------------------------------------------------------------------- + + //! Generate a bit-mask that has `x` most significant bits set. + static ASMJIT_INLINE uint32_t bits(uint32_t x) noexcept { + // Shifting more bits than the type has results in undefined behavior. In + // such case asmjit trashes the result by ORing with `overflow` mask, which + // discards the undefined value returned by the shift. + uint32_t overflow = static_cast( + -static_cast(x >= sizeof(uint32_t) * 8)); + + return ((static_cast(1) << x) - 1U) | overflow; + } + + // -------------------------------------------------------------------------- + // [HasBit] + // -------------------------------------------------------------------------- + + //! Get whether `x` has bit `n` set. + template + static ASMJIT_INLINE bool hasBit(T x, Index n) noexcept { + return (x & (static_cast(1) << n)) != 0; + } + + // -------------------------------------------------------------------------- + // [BitCount] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t bitCountSlow(uint32_t x) noexcept { + // From: http://graphics.stanford.edu/~seander/bithacks.html + x = x - ((x >> 1) & 0x55555555U); + x = (x & 0x33333333U) + ((x >> 2) & 0x33333333U); + return (((x + (x >> 4)) & 0x0F0F0F0FU) * 0x01010101U) >> 24; + } + + //! Get count of bits in `x`. + static ASMJIT_INLINE uint32_t bitCount(uint32_t x) noexcept { +#if ASMJIT_CC_GCC || ASMJIT_CC_CLANG + return __builtin_popcount(x); +#else + return bitCountSlow(x); +#endif + } + + // -------------------------------------------------------------------------- + // [FindFirstBit] + // -------------------------------------------------------------------------- + + //! \internal + static ASMJIT_INLINE uint32_t findFirstBitSlow(uint32_t mask) noexcept { + // This is a reference (slow) implementation of `findFirstBit()`, used when + // we don't have a C++ compiler support. The implementation speed has been + // improved to check for 2 bits per iteration. + uint32_t i = 1; + + while (mask != 0) { + uint32_t two = mask & 0x3; + if (two != 0x0) + return i - (two & 0x1); + + i += 2; + mask >>= 2; + } + + return 0xFFFFFFFFU; + } + + //! Find a first bit in `mask`. + static ASMJIT_INLINE uint32_t findFirstBit(uint32_t mask) noexcept { +#if ASMJIT_CC_MSC_GE(14, 0, 0) && (ASMJIT_ARCH_X86 || ASMJIT_ARCH_ARM32 || \ + ASMJIT_ARCH_X64 || ASMJIT_ARCH_ARM64) + DWORD i; + if (_BitScanForward(&i, mask)) + return static_cast(i); + else + return 0xFFFFFFFFU; +#elif ASMJIT_CC_GCC_GE(3, 4, 6) || ASMJIT_CC_CLANG + if (mask) + return __builtin_ctz(mask); + else + return 0xFFFFFFFFU; +#else + return findFirstBitSlow(mask); +#endif + } + + // -------------------------------------------------------------------------- + // [Misc] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t keepNOnesFromRight(uint32_t mask, uint32_t nBits) noexcept { + uint32_t m = 0x1; + + do { + nBits -= (mask & m) != 0; + m <<= 1; + if (nBits == 0) { + m -= 1; + mask &= m; + break; + } + } while (m); + + return mask; + } + + static ASMJIT_INLINE uint32_t indexNOnesFromRight(uint8_t* dst, uint32_t mask, uint32_t nBits) noexcept { + uint32_t totalBits = nBits; + uint8_t i = 0; + uint32_t m = 0x1; + + do { + if (mask & m) { + *dst++ = i; + if (--nBits == 0) + break; + } + + m <<= 1; + i++; + } while (m); + + return totalBits - nBits; + } + + // -------------------------------------------------------------------------- + // [Alignment] + // -------------------------------------------------------------------------- + + template + static ASMJIT_INLINE bool isAligned(T base, T alignment) noexcept { + return (base % alignment) == 0; + } + + //! Align `base` to `alignment`. + template + static ASMJIT_INLINE T alignTo(T base, T alignment) noexcept { + return (base + (alignment - 1)) & ~(alignment - 1); + } + + template + static ASMJIT_INLINE T alignToPowerOf2(T base) noexcept { + // Implementation is from "Hacker's Delight" by Henry S. Warren, Jr. + base -= 1; + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4293) +#endif // _MSC_VER + + base = base | (base >> 1); + base = base | (base >> 2); + base = base | (base >> 4); + + // 8/16/32 constants are multiplied by the condition to prevent a compiler + // complaining about the 'shift count >= type width' (GCC). + if (sizeof(T) >= 2) base = base | (base >> ( 8 * (sizeof(T) >= 2))); // Base >> 8. + if (sizeof(T) >= 4) base = base | (base >> (16 * (sizeof(T) >= 4))); // Base >> 16. + if (sizeof(T) >= 8) base = base | (base >> (32 * (sizeof(T) >= 8))); // Base >> 32. + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif // _MSC_VER + + return base + 1; + } + + //! Get delta required to align `base` to `alignment`. + template + static ASMJIT_INLINE T alignDiff(T base, T alignment) noexcept { + return alignTo(base, alignment) - base; + } + + // -------------------------------------------------------------------------- + // [String] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE size_t strLen(const char* s, size_t maxlen) noexcept { + size_t i; + for (i = 0; i < maxlen; i++) + if (!s[i]) + break; + return i; + } + + // -------------------------------------------------------------------------- + // [BSwap] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t byteswap32(uint32_t x) noexcept { +#if ASMJIT_CC_MSC + return static_cast(_byteswap_ulong(x)); +#elif ASMJIT_CC_GCC_GE(4, 3, 0) || ASMJIT_CC_CLANG_GE(2, 6, 0) + return __builtin_bswap32(x); +#else + uint32_t y = x & 0x00FFFF00U; + x = (x << 24) + (x >> 24); + y = (y << 8) + (y >> 8); + return x + (y & 0x00FFFF00U); +#endif + } + + // -------------------------------------------------------------------------- + // [ReadMem] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t readU8(const void* p) noexcept { + return static_cast(static_cast(p)[0]); + } + + static ASMJIT_INLINE int32_t readI8(const void* p) noexcept { + return static_cast(static_cast(p)[0]); + } + + template + static ASMJIT_INLINE uint32_t readU16xLE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_LE && (ASMJIT_ARCH_UNALIGNED_16 || Alignment >= 2)) { + return static_cast(static_cast(p)[0]); + } + else { + uint32_t x = static_cast(static_cast(p)[0]); + uint32_t y = static_cast(static_cast(p)[1]); + return x + (y << 8); + } + } + + template + static ASMJIT_INLINE uint32_t readU16xBE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_BE && (ASMJIT_ARCH_UNALIGNED_16 || Alignment >= 2)) { + return static_cast(static_cast(p)[0]); + } + else { + uint32_t x = static_cast(static_cast(p)[0]); + uint32_t y = static_cast(static_cast(p)[1]); + return (x << 8) + y; + } + } + + template + static ASMJIT_INLINE uint32_t readU16x(const void* p) noexcept { + return ASMJIT_ARCH_LE ? readU16xLE(p) : readU16xBE(p); + } + + template + static ASMJIT_INLINE int32_t readI16xLE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_LE && (ASMJIT_ARCH_UNALIGNED_16 || Alignment >= 2)) { + return static_cast(static_cast(p)[0]); + } + else { + int32_t x = static_cast(static_cast(p)[0]); + int32_t y = static_cast(static_cast(p)[1]); + return x + (y << 8); + } + } + + template + static ASMJIT_INLINE int32_t readI16xBE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_BE && (ASMJIT_ARCH_UNALIGNED_16 || Alignment >= 2)) { + return static_cast(static_cast(p)[0]); + } + else { + int32_t x = static_cast(static_cast(p)[0]); + int32_t y = static_cast(static_cast(p)[1]); + return (x << 8) + y; + } + } + + template + static ASMJIT_INLINE int32_t readI16x(const void* p) noexcept { + return ASMJIT_ARCH_LE ? readI16xLE(p) : readI16xBE(p); + } + + static ASMJIT_INLINE uint32_t readU16aLE(const void* p) noexcept { return readU16xLE<2>(p); } + static ASMJIT_INLINE uint32_t readU16uLE(const void* p) noexcept { return readU16xLE<0>(p); } + + static ASMJIT_INLINE uint32_t readU16aBE(const void* p) noexcept { return readU16xBE<2>(p); } + static ASMJIT_INLINE uint32_t readU16uBE(const void* p) noexcept { return readU16xBE<0>(p); } + + static ASMJIT_INLINE uint32_t readU16a(const void* p) noexcept { return readU16x<2>(p); } + static ASMJIT_INLINE uint32_t readU16u(const void* p) noexcept { return readU16x<0>(p); } + + static ASMJIT_INLINE int32_t readI16aLE(const void* p) noexcept { return readI16xLE<2>(p); } + static ASMJIT_INLINE int32_t readI16uLE(const void* p) noexcept { return readI16xLE<0>(p); } + + static ASMJIT_INLINE int32_t readI16aBE(const void* p) noexcept { return readI16xBE<2>(p); } + static ASMJIT_INLINE int32_t readI16uBE(const void* p) noexcept { return readI16xBE<0>(p); } + + static ASMJIT_INLINE int32_t readI16a(const void* p) noexcept { return readI16x<2>(p); } + static ASMJIT_INLINE int32_t readI16u(const void* p) noexcept { return readI16x<0>(p); } + + template + static ASMJIT_INLINE uint32_t readU32xLE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_UNALIGNED_32 || Alignment >= 4) { + uint32_t x = static_cast(p)[0]; + return ASMJIT_ARCH_LE ? x : byteswap32(x); + } + else { + uint32_t x = readU16xLE(static_cast(p) + 0); + uint32_t y = readU16xLE(static_cast(p) + 2); + return x + (y << 16); + } + } + + template + static ASMJIT_INLINE uint32_t readU32xBE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_UNALIGNED_32 || Alignment >= 4) { + uint32_t x = static_cast(p)[0]; + return ASMJIT_ARCH_BE ? x : byteswap32(x); + } + else { + uint32_t x = readU16xBE(static_cast(p) + 0); + uint32_t y = readU16xBE(static_cast(p) + 2); + return (x << 16) + y; + } + } + + template + static ASMJIT_INLINE uint32_t readU32x(const void* p) noexcept { + return ASMJIT_ARCH_LE ? readU32xLE(p) : readU32xBE(p); + } + + template + static ASMJIT_INLINE int32_t readI32xLE(const void* p) noexcept { + return static_cast(readU32xLE(p)); + } + + template + static ASMJIT_INLINE int32_t readI32xBE(const void* p) noexcept { + return static_cast(readU32xBE(p)); + } + + template + static ASMJIT_INLINE int32_t readI32x(const void* p) noexcept { + return ASMJIT_ARCH_LE ? readI32xLE(p) : readI32xBE(p); + } + + static ASMJIT_INLINE uint32_t readU32a(const void* p) noexcept { return readU32x<4>(p); } + static ASMJIT_INLINE uint32_t readU32u(const void* p) noexcept { return readU32x<0>(p); } + + static ASMJIT_INLINE uint32_t readU32aLE(const void* p) noexcept { return readU32xLE<4>(p); } + static ASMJIT_INLINE uint32_t readU32uLE(const void* p) noexcept { return readU32xLE<0>(p); } + + static ASMJIT_INLINE uint32_t readU32aBE(const void* p) noexcept { return readU32xBE<4>(p); } + static ASMJIT_INLINE uint32_t readU32uBE(const void* p) noexcept { return readU32xBE<0>(p); } + + static ASMJIT_INLINE int32_t readI32a(const void* p) noexcept { return readI32x<4>(p); } + static ASMJIT_INLINE int32_t readI32u(const void* p) noexcept { return readI32x<0>(p); } + + static ASMJIT_INLINE int32_t readI32aLE(const void* p) noexcept { return readI32xLE<4>(p); } + static ASMJIT_INLINE int32_t readI32uLE(const void* p) noexcept { return readI32xLE<0>(p); } + + static ASMJIT_INLINE int32_t readI32aBE(const void* p) noexcept { return readI32xBE<4>(p); } + static ASMJIT_INLINE int32_t readI32uBE(const void* p) noexcept { return readI32xBE<0>(p); } + + template + static ASMJIT_INLINE uint64_t readU64xLE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_LE && (ASMJIT_ARCH_UNALIGNED_64 || Alignment >= 8)) { + return static_cast(p)[0]; + } + else { + uint32_t x = readU32xLE(static_cast(p) + 0); + uint32_t y = readU32xLE(static_cast(p) + 4); + return static_cast(x) + (static_cast(y) << 32); + } + } + + template + static ASMJIT_INLINE uint64_t readU64xBE(const void* p) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_BE && (ASMJIT_ARCH_UNALIGNED_64 || Alignment >= 8)) { + return static_cast(p)[0]; + } + else { + uint32_t x = readU32xLE(static_cast(p) + 0); + uint32_t y = readU32xLE(static_cast(p) + 4); + return (static_cast(x) << 32) + static_cast(y); + } + } + + template + static ASMJIT_INLINE uint64_t readU64x(const void* p) noexcept { + return ASMJIT_ARCH_LE ? readU64xLE(p) : readU64xBE(p); + } + + template + static ASMJIT_INLINE int64_t readI64xLE(const void* p) noexcept { + return static_cast(readU64xLE(p)); + } + + template + static ASMJIT_INLINE int64_t readI64xBE(const void* p) noexcept { + return static_cast(readU64xBE(p)); + } + + template + static ASMJIT_INLINE int64_t readI64x(const void* p) noexcept { + return ASMJIT_ARCH_LE ? readI64xLE(p) : readI64xBE(p); + } + + static ASMJIT_INLINE uint64_t readU64a(const void* p) noexcept { return readU64x<8>(p); } + static ASMJIT_INLINE uint64_t readU64u(const void* p) noexcept { return readU64x<0>(p); } + + static ASMJIT_INLINE uint64_t readU64aLE(const void* p) noexcept { return readU64xLE<8>(p); } + static ASMJIT_INLINE uint64_t readU64uLE(const void* p) noexcept { return readU64xLE<0>(p); } + + static ASMJIT_INLINE uint64_t readU64aBE(const void* p) noexcept { return readU64xBE<8>(p); } + static ASMJIT_INLINE uint64_t readU64uBE(const void* p) noexcept { return readU64xBE<0>(p); } + + static ASMJIT_INLINE int64_t readI64a(const void* p) noexcept { return readI64x<8>(p); } + static ASMJIT_INLINE int64_t readI64u(const void* p) noexcept { return readI64x<0>(p); } + + static ASMJIT_INLINE int64_t readI64aLE(const void* p) noexcept { return readI64xLE<8>(p); } + static ASMJIT_INLINE int64_t readI64uLE(const void* p) noexcept { return readI64xLE<0>(p); } + + static ASMJIT_INLINE int64_t readI64aBE(const void* p) noexcept { return readI64xBE<8>(p); } + static ASMJIT_INLINE int64_t readI64uBE(const void* p) noexcept { return readI64xBE<0>(p); } + + // -------------------------------------------------------------------------- + // [WriteMem] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE void writeU8(void* p, uint32_t x) noexcept { + static_cast(p)[0] = static_cast(x & 0xFFU); + } + + static ASMJIT_INLINE void writeI8(void* p, int32_t x) noexcept { + static_cast(p)[0] = static_cast(x & 0xFF); + } + + template + static ASMJIT_INLINE void writeU16xLE(void* p, uint32_t x) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_LE && (ASMJIT_ARCH_UNALIGNED_16 || Alignment >= 2)) { + static_cast(p)[0] = static_cast(x & 0xFFFFU); + } + else { + static_cast(p)[0] = static_cast((x ) & 0xFFU); + static_cast(p)[1] = static_cast((x >> 8) & 0xFFU); + } + } + + template + static ASMJIT_INLINE void writeU16xBE(void* p, uint32_t x) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_BE && (ASMJIT_ARCH_UNALIGNED_16 || Alignment >= 2)) { + static_cast(p)[0] = static_cast(x & 0xFFFFU); + } + else { + static_cast(p)[0] = static_cast((x >> 8) & 0xFFU); + static_cast(p)[1] = static_cast((x ) & 0xFFU); + } + } + + template + static ASMJIT_INLINE void writeU16x(void* p, uint32_t x) noexcept { + if (ASMJIT_ARCH_LE) + writeU16xLE(p, x); + else + writeU16xBE(p, x); + } + + template + static ASMJIT_INLINE void writeI16xLE(void* p, int32_t x) noexcept { + writeU16xLE(p, static_cast(x)); + } + + template + static ASMJIT_INLINE void writeI16xBE(void* p, int32_t x) noexcept { + writeU16xBE(p, static_cast(x)); + } + + template + static ASMJIT_INLINE void writeI16x(void* p, int32_t x) noexcept { + writeU16x(p, static_cast(x)); + } + + static ASMJIT_INLINE void writeU16aLE(void* p, uint32_t x) noexcept { writeU16xLE<2>(p, x); } + static ASMJIT_INLINE void writeU16uLE(void* p, uint32_t x) noexcept { writeU16xLE<0>(p, x); } + + static ASMJIT_INLINE void writeU16aBE(void* p, uint32_t x) noexcept { writeU16xBE<2>(p, x); } + static ASMJIT_INLINE void writeU16uBE(void* p, uint32_t x) noexcept { writeU16xBE<0>(p, x); } + + static ASMJIT_INLINE void writeU16a(void* p, uint32_t x) noexcept { writeU16x<2>(p, x); } + static ASMJIT_INLINE void writeU16u(void* p, uint32_t x) noexcept { writeU16x<0>(p, x); } + + static ASMJIT_INLINE void writeI16aLE(void* p, int32_t x) noexcept { writeI16xLE<2>(p, x); } + static ASMJIT_INLINE void writeI16uLE(void* p, int32_t x) noexcept { writeI16xLE<0>(p, x); } + + static ASMJIT_INLINE void writeI16aBE(void* p, int32_t x) noexcept { writeI16xBE<2>(p, x); } + static ASMJIT_INLINE void writeI16uBE(void* p, int32_t x) noexcept { writeI16xBE<0>(p, x); } + + static ASMJIT_INLINE void writeI16a(void* p, int32_t x) noexcept { writeI16x<2>(p, x); } + static ASMJIT_INLINE void writeI16u(void* p, int32_t x) noexcept { writeI16x<0>(p, x); } + + template + static ASMJIT_INLINE void writeU32xLE(void* p, uint32_t x) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_UNALIGNED_32 || Alignment >= 4) { + static_cast(p)[0] = ASMJIT_ARCH_LE ? x : byteswap32(x); + } + else { + writeU16xLE(static_cast(p) + 0, x >> 16); + writeU16xLE(static_cast(p) + 2, x); + } + } + + template + static ASMJIT_INLINE void writeU32xBE(void* p, uint32_t x) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_UNALIGNED_32 || Alignment >= 4) { + static_cast(p)[0] = ASMJIT_ARCH_BE ? x : byteswap32(x); + } + else { + writeU16xBE(static_cast(p) + 0, x); + writeU16xBE(static_cast(p) + 2, x >> 16); + } + } + + template + static ASMJIT_INLINE void writeU32x(void* p, uint32_t x) noexcept { + if (ASMJIT_ARCH_LE) + writeU32xLE(p, x); + else + writeU32xBE(p, x); + } + + template + static ASMJIT_INLINE void writeI32xLE(void* p, int32_t x) noexcept { + writeU32xLE(p, static_cast(x)); + } + + template + static ASMJIT_INLINE void writeI32xBE(void* p, int32_t x) noexcept { + writeU32xBE(p, static_cast(x)); + } + + template + static ASMJIT_INLINE void writeI32x(void* p, int32_t x) noexcept { + writeU32x(p, static_cast(x)); + } + + static ASMJIT_INLINE void writeU32aLE(void* p, uint32_t x) noexcept { writeU32xLE<4>(p, x); } + static ASMJIT_INLINE void writeU32uLE(void* p, uint32_t x) noexcept { writeU32xLE<0>(p, x); } + + static ASMJIT_INLINE void writeU32aBE(void* p, uint32_t x) noexcept { writeU32xBE<4>(p, x); } + static ASMJIT_INLINE void writeU32uBE(void* p, uint32_t x) noexcept { writeU32xBE<0>(p, x); } + + static ASMJIT_INLINE void writeU32a(void* p, uint32_t x) noexcept { writeU32x<4>(p, x); } + static ASMJIT_INLINE void writeU32u(void* p, uint32_t x) noexcept { writeU32x<0>(p, x); } + + static ASMJIT_INLINE void writeI32aLE(void* p, int32_t x) noexcept { writeI32xLE<4>(p, x); } + static ASMJIT_INLINE void writeI32uLE(void* p, int32_t x) noexcept { writeI32xLE<0>(p, x); } + + static ASMJIT_INLINE void writeI32aBE(void* p, int32_t x) noexcept { writeI32xBE<4>(p, x); } + static ASMJIT_INLINE void writeI32uBE(void* p, int32_t x) noexcept { writeI32xBE<0>(p, x); } + + static ASMJIT_INLINE void writeI32a(void* p, int32_t x) noexcept { writeI32x<4>(p, x); } + static ASMJIT_INLINE void writeI32u(void* p, int32_t x) noexcept { writeI32x<0>(p, x); } + + template + static ASMJIT_INLINE void writeU64xLE(void* p, uint64_t x) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_LE && (ASMJIT_ARCH_UNALIGNED_64 || Alignment >= 8)) { + static_cast(p)[0] = x; + } + else { + writeU32xLE(static_cast(p) + 0, static_cast(x >> 32)); + writeU32xLE(static_cast(p) + 4, static_cast(x & 0xFFFFFFFFU)); + } + } + + template + static ASMJIT_INLINE void writeU64xBE(void* p, uint64_t x) noexcept { + ASMJIT_ASSUME_ALIGNED(p, Alignment > 1 ? Alignment : 1U); + if (ASMJIT_ARCH_BE && (ASMJIT_ARCH_UNALIGNED_64 || Alignment >= 8)) { + static_cast(p)[0] = x; + } + else { + writeU32xBE(static_cast(p) + 0, static_cast(x & 0xFFFFFFFFU)); + writeU32xBE(static_cast(p) + 4, static_cast(x >> 32)); + } + } + + template + static ASMJIT_INLINE void writeU64x(void* p, uint64_t x) noexcept { + if (ASMJIT_ARCH_LE) + writeU64xLE(p, x); + else + writeU64xBE(p, x); + } + + template + static ASMJIT_INLINE void writeI64xLE(void* p, int64_t x) noexcept { + writeU64xLE(p, static_cast(x)); + } + + template + static ASMJIT_INLINE void writeI64xBE(void* p, int64_t x) noexcept { + writeU64xBE(p, static_cast(x)); + } + + template + static ASMJIT_INLINE void writeI64x(void* p, int64_t x) noexcept { + writeU64x(p, static_cast(x)); + } + + static ASMJIT_INLINE void writeU64aLE(void* p, uint64_t x) noexcept { writeU64xLE<8>(p, x); } + static ASMJIT_INLINE void writeU64uLE(void* p, uint64_t x) noexcept { writeU64xLE<0>(p, x); } + + static ASMJIT_INLINE void writeU64aBE(void* p, uint64_t x) noexcept { writeU64xBE<8>(p, x); } + static ASMJIT_INLINE void writeU64uBE(void* p, uint64_t x) noexcept { writeU64xBE<0>(p, x); } + + static ASMJIT_INLINE void writeU64a(void* p, uint64_t x) noexcept { writeU64x<8>(p, x); } + static ASMJIT_INLINE void writeU64u(void* p, uint64_t x) noexcept { writeU64x<0>(p, x); } + + static ASMJIT_INLINE void writeI64aLE(void* p, int64_t x) noexcept { writeI64xLE<8>(p, x); } + static ASMJIT_INLINE void writeI64uLE(void* p, int64_t x) noexcept { writeI64xLE<0>(p, x); } + + static ASMJIT_INLINE void writeI64aBE(void* p, int64_t x) noexcept { writeI64xBE<8>(p, x); } + static ASMJIT_INLINE void writeI64uBE(void* p, int64_t x) noexcept { writeI64xBE<0>(p, x); } + + static ASMJIT_INLINE void writeI64a(void* p, int64_t x) noexcept { writeI64x<8>(p, x); } + static ASMJIT_INLINE void writeI64u(void* p, int64_t x) noexcept { writeI64x<0>(p, x); } + + // -------------------------------------------------------------------------- + // [GetTickCount] + // -------------------------------------------------------------------------- + + //! Get the current CPU tick count, used for benchmarking (1ms resolution). + static ASMJIT_API uint32_t getTickCount() noexcept; +}; + +// ============================================================================ +// [asmjit::UInt64] +// ============================================================================ + +union UInt64 { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64 fromUInt64(uint64_t val) noexcept { + UInt64 data; + data.setUInt64(val); + return data; + } + + ASMJIT_INLINE UInt64 fromUInt64(const UInt64& val) noexcept { + UInt64 data; + data.setUInt64(val); + return data; + } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 = 0; + } + else { + u32[0] = 0; + u32[1] = 0; + } + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint64_t getUInt64() const noexcept { + return u64; + } + + ASMJIT_INLINE UInt64& setUInt64(uint64_t val) noexcept { + u64 = val; + return *this; + } + + ASMJIT_INLINE UInt64& setUInt64(const UInt64& val) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 = val.u64; + } + else { + u32[0] = val.u32[0]; + u32[1] = val.u32[1]; + } + return *this; + } + + ASMJIT_INLINE UInt64& setPacked_2x32(uint32_t u0, uint32_t u1) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 = Utils::pack64_2x32(u0, u1); + } + else { + u32[0] = u0; + u32[1] = u1; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Add] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& add(uint64_t val) noexcept { + u64 += val; + return *this; + } + + ASMJIT_INLINE UInt64& add(const UInt64& val) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 += val.u64; + } + else { + u32[0] += val.u32[0]; + u32[1] += val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Sub] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& sub(uint64_t val) noexcept { + u64 -= val; + return *this; + } + + ASMJIT_INLINE UInt64& sub(const UInt64& val) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 -= val.u64; + } + else { + u32[0] -= val.u32[0]; + u32[1] -= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [And] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& and_(uint64_t val) noexcept { + u64 &= val; + return *this; + } + + ASMJIT_INLINE UInt64& and_(const UInt64& val) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 &= val.u64; + } + else { + u32[0] &= val.u32[0]; + u32[1] &= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [AndNot] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& andNot(uint64_t val) noexcept { + u64 &= ~val; + return *this; + } + + ASMJIT_INLINE UInt64& andNot(const UInt64& val) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 &= ~val.u64; + } + else { + u32[0] &= ~val.u32[0]; + u32[1] &= ~val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Or] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& or_(uint64_t val) noexcept { + u64 |= val; + return *this; + } + + ASMJIT_INLINE UInt64& or_(const UInt64& val) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 |= val.u64; + } + else { + u32[0] |= val.u32[0]; + u32[1] |= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Xor] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& xor_(uint64_t val) noexcept { + u64 ^= val; + return *this; + } + + ASMJIT_INLINE UInt64& xor_(const UInt64& val) noexcept { + if (ASMJIT_ARCH_64BIT) { + u64 ^= val.u64; + } + else { + u32[0] ^= val.u32[0]; + u32[1] ^= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Eq] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool isZero() const noexcept { + if (ASMJIT_ARCH_64BIT) + return u64 == 0; + else + return (u32[0] | u32[1]) == 0; + } + + ASMJIT_INLINE bool isNonZero() const noexcept { + if (ASMJIT_ARCH_64BIT) + return u64 != 0; + else + return (u32[0] | u32[1]) != 0; + } + + ASMJIT_INLINE bool eq(uint64_t val) const noexcept { + return u64 == val; + } + + ASMJIT_INLINE bool eq(const UInt64& val) const noexcept { + if (ASMJIT_ARCH_64BIT) + return u64 == val.u64; + else + return u32[0] == val.u32[0] && u32[1] == val.u32[1]; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& operator+=(uint64_t val) noexcept { return add(val); } + ASMJIT_INLINE UInt64& operator+=(const UInt64& val) noexcept { return add(val); } + + ASMJIT_INLINE UInt64& operator-=(uint64_t val) noexcept { return sub(val); } + ASMJIT_INLINE UInt64& operator-=(const UInt64& val) noexcept { return sub(val); } + + ASMJIT_INLINE UInt64& operator&=(uint64_t val) noexcept { return and_(val); } + ASMJIT_INLINE UInt64& operator&=(const UInt64& val) noexcept { return and_(val); } + + ASMJIT_INLINE UInt64& operator|=(uint64_t val) noexcept { return or_(val); } + ASMJIT_INLINE UInt64& operator|=(const UInt64& val) noexcept { return or_(val); } + + ASMJIT_INLINE UInt64& operator^=(uint64_t val) noexcept { return xor_(val); } + ASMJIT_INLINE UInt64& operator^=(const UInt64& val) noexcept { return xor_(val); } + + ASMJIT_INLINE bool operator==(uint64_t val) const noexcept { return eq(val); } + ASMJIT_INLINE bool operator==(const UInt64& val) const noexcept { return eq(val); } + + ASMJIT_INLINE bool operator!=(uint64_t val) const noexcept { return !eq(val); } + ASMJIT_INLINE bool operator!=(const UInt64& val) const noexcept { return !eq(val); } + + ASMJIT_INLINE bool operator<(uint64_t val) const noexcept { return u64 < val; } + ASMJIT_INLINE bool operator<(const UInt64& val) const noexcept { return u64 < val.u64; } + + ASMJIT_INLINE bool operator<=(uint64_t val) const noexcept { return u64 <= val; } + ASMJIT_INLINE bool operator<=(const UInt64& val) const noexcept { return u64 <= val.u64; } + + ASMJIT_INLINE bool operator>(uint64_t val) const noexcept { return u64 > val; } + ASMJIT_INLINE bool operator>(const UInt64& val) const noexcept { return u64 > val.u64; } + + ASMJIT_INLINE bool operator>=(uint64_t val) const noexcept { return u64 >= val; } + ASMJIT_INLINE bool operator>=(const UInt64& val) const noexcept { return u64 >= val.u64; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! 64-bit unsigned value. + uint64_t u64; + + uint32_t u32[2]; + uint16_t u16[4]; + uint8_t u8[8]; + + struct { +#if ASMJIT_ARCH_LE + uint32_t lo, hi; +#else + uint32_t hi, lo; +#endif // ASMJIT_ARCH_LE + }; +}; + +// ============================================================================ +// [asmjit::Lock] +// ============================================================================ + +//! \internal +//! +//! Lock. +struct Lock { + ASMJIT_NO_COPY(Lock) + + // -------------------------------------------------------------------------- + // [Windows] + // -------------------------------------------------------------------------- + +#if ASMJIT_OS_WINDOWS + typedef CRITICAL_SECTION Handle; + + //! Create a new `Lock` instance. + ASMJIT_INLINE Lock() noexcept { InitializeCriticalSection(&_handle); } + //! Destroy the `Lock` instance. + ASMJIT_INLINE ~Lock() noexcept { DeleteCriticalSection(&_handle); } + + //! Lock. + ASMJIT_INLINE void lock() noexcept { EnterCriticalSection(&_handle); } + //! Unlock. + ASMJIT_INLINE void unlock() noexcept { LeaveCriticalSection(&_handle); } +#endif // ASMJIT_OS_WINDOWS + + // -------------------------------------------------------------------------- + // [Posix] + // -------------------------------------------------------------------------- + +#if ASMJIT_OS_POSIX + typedef pthread_mutex_t Handle; + + //! Create a new `Lock` instance. + ASMJIT_INLINE Lock() noexcept { pthread_mutex_init(&_handle, nullptr); } + //! Destroy the `Lock` instance. + ASMJIT_INLINE ~Lock() noexcept { pthread_mutex_destroy(&_handle); } + + //! Lock. + ASMJIT_INLINE void lock() noexcept { pthread_mutex_lock(&_handle); } + //! Unlock. + ASMJIT_INLINE void unlock() noexcept { pthread_mutex_unlock(&_handle); } +#endif // ASMJIT_OS_POSIX + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Native handle. + Handle _handle; +}; + +// ============================================================================ +// [asmjit::AutoLock] +// ============================================================================ + +//! \internal +//! +//! Scoped lock. +struct AutoLock { + ASMJIT_NO_COPY(AutoLock) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE AutoLock(Lock& target) noexcept : _target(target) { + _target.lock(); + } + + ASMJIT_INLINE ~AutoLock() noexcept { + _target.unlock(); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Reference to the `Lock`. + Lock& _target; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_UTILS_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/vectypes.h b/DynamicHooks/thirdparty/AsmJit/base/vectypes.h new file mode 100644 index 0000000..89c3176 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/vectypes.h @@ -0,0 +1,1075 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_VECTYPES_H +#define _ASMJIT_BASE_VECTYPES_H + +// [Dependencies] +#include "../base/globals.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::Vec64] +// ============================================================================ + +//! 64-bit vector register data. +union Vec64 { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Set all eight 8-bit signed integers. + static ASMJIT_INLINE Vec64 fromSB(int8_t x0) noexcept { + Vec64 self; + self.setSB(x0); + return self; + } + + //! Set all eight 8-bit unsigned integers. + static ASMJIT_INLINE Vec64 fromUB(uint8_t x0) noexcept { + Vec64 self; + self.setUB(x0); + return self; + } + + //! Set all eight 8-bit signed integers. + static ASMJIT_INLINE Vec64 fromSB( + int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7) noexcept { + + Vec64 self; + self.setSB(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! Set all eight 8-bit unsigned integers. + static ASMJIT_INLINE Vec64 fromUB( + uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) noexcept { + + Vec64 self; + self.setUB(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! Set all four 16-bit signed integers. + static ASMJIT_INLINE Vec64 fromSW(int16_t x0) noexcept { + Vec64 self; + self.setSW(x0); + return self; + } + + //! Set all four 16-bit unsigned integers. + static ASMJIT_INLINE Vec64 fromUW(uint16_t x0) noexcept { + Vec64 self; + self.setUW(x0); + return self; + } + + //! Set all four 16-bit signed integers. + static ASMJIT_INLINE Vec64 fromSW(int16_t x0, int16_t x1, int16_t x2, int16_t x3) noexcept { + Vec64 self; + self.setSW(x0, x1, x2, x3); + return self; + } + + //! Set all four 16-bit unsigned integers. + static ASMJIT_INLINE Vec64 fromUW(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3) noexcept { + Vec64 self; + self.setUW(x0, x1, x2, x3); + return self; + } + + //! Set all two 32-bit signed integers. + static ASMJIT_INLINE Vec64 fromSD(int32_t x0) noexcept { + Vec64 self; + self.setSD(x0); + return self; + } + + //! Set all two 32-bit unsigned integers. + static ASMJIT_INLINE Vec64 fromUD(uint32_t x0) noexcept { + Vec64 self; + self.setUD(x0); + return self; + } + + //! Set all two 32-bit signed integers. + static ASMJIT_INLINE Vec64 fromSD(int32_t x0, int32_t x1) noexcept { + Vec64 self; + self.setSD(x0, x1); + return self; + } + + //! Set all two 32-bit unsigned integers. + static ASMJIT_INLINE Vec64 fromUD(uint32_t x0, uint32_t x1) noexcept { + Vec64 self; + self.setUD(x0, x1); + return self; + } + + //! Set 64-bit signed integer. + static ASMJIT_INLINE Vec64 fromSQ(int64_t x0) noexcept { + Vec64 self; + self.setSQ(x0); + return self; + } + + //! Set 64-bit unsigned integer. + static ASMJIT_INLINE Vec64 fromUQ(uint64_t x0) noexcept { + Vec64 self; + self.setUQ(x0); + return self; + } + + //! Set all two SP-FP values. + static ASMJIT_INLINE Vec64 fromSF(float x0) noexcept { + Vec64 self; + self.setSF(x0); + return self; + } + + //! Set all two SP-FP values. + static ASMJIT_INLINE Vec64 fromSF(float x0, float x1) noexcept { + Vec64 self; + self.setSF(x0, x1); + return self; + } + + //! Set all two SP-FP values. + static ASMJIT_INLINE Vec64 fromDF(double x0) noexcept { + Vec64 self; + self.setDF(x0); + return self; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Set all eight 8-bit signed integers. + ASMJIT_INLINE void setSB(int8_t x0) noexcept { + setUB(static_cast(x0)); + } + + //! Set all eight 8-bit unsigned integers. + ASMJIT_INLINE void setUB(uint8_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t xq = static_cast(x0) * ASMJIT_UINT64_C(0x0101010101010101); + uq[0] = xq; + } + else { + uint32_t xd = static_cast(x0) * static_cast(0x01010101U); + ud[0] = xd; + ud[1] = xd; + } + } + + //! Set all eight 8-bit signed integers. + ASMJIT_INLINE void setSB( + int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7) noexcept { + + sb[0] = x0; sb[1] = x1; sb[2] = x2; sb[3] = x3; + sb[4] = x4; sb[5] = x5; sb[6] = x6; sb[7] = x7; + } + + //! Set all eight 8-bit unsigned integers. + ASMJIT_INLINE void setUB( + uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) noexcept { + + ub[0] = x0; ub[1] = x1; ub[2] = x2; ub[3] = x3; + ub[4] = x4; ub[5] = x5; ub[6] = x6; ub[7] = x7; + } + + //! Set all four 16-bit signed integers. + ASMJIT_INLINE void setSW(int16_t x0) noexcept { + setUW(static_cast(x0)); + } + + //! Set all four 16-bit unsigned integers. + ASMJIT_INLINE void setUW(uint16_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t xq = static_cast(x0) * ASMJIT_UINT64_C(0x0001000100010001); + uq[0] = xq; + } + else { + uint32_t xd = static_cast(x0) * static_cast(0x00010001U); + ud[0] = xd; + ud[1] = xd; + } + } + + //! Set all four 16-bit signed integers. + ASMJIT_INLINE void setSW(int16_t x0, int16_t x1, int16_t x2, int16_t x3) noexcept { + sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3; + } + + //! Set all four 16-bit unsigned integers. + ASMJIT_INLINE void setUW(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3) noexcept { + uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3; + } + + //! Set all two 32-bit signed integers. + ASMJIT_INLINE void setSD(int32_t x0) noexcept { + sd[0] = x0; sd[1] = x0; + } + + //! Set all two 32-bit unsigned integers. + ASMJIT_INLINE void setUD(uint32_t x0) noexcept { + ud[0] = x0; ud[1] = x0; + } + + //! Set all two 32-bit signed integers. + ASMJIT_INLINE void setSD(int32_t x0, int32_t x1) noexcept { + sd[0] = x0; sd[1] = x1; + } + + //! Set all two 32-bit unsigned integers. + ASMJIT_INLINE void setUD(uint32_t x0, uint32_t x1) noexcept { + ud[0] = x0; ud[1] = x1; + } + + //! Set 64-bit signed integer. + ASMJIT_INLINE void setSQ(int64_t x0) noexcept { + sq[0] = x0; + } + + //! Set 64-bit unsigned integer. + ASMJIT_INLINE void setUQ(uint64_t x0) noexcept { + uq[0] = x0; + } + + //! Set all two SP-FP values. + ASMJIT_INLINE void setSF(float x0) noexcept { + sf[0] = x0; sf[1] = x0; + } + + //! Set all two SP-FP values. + ASMJIT_INLINE void setSF(float x0, float x1) noexcept { + sf[0] = x0; sf[1] = x1; + } + + //! Set all two SP-FP values. + ASMJIT_INLINE void setDF(double x0) noexcept { + df[0] = x0; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Array of eight 8-bit signed integers. + int8_t sb[8]; + //! Array of eight 8-bit unsigned integers. + uint8_t ub[8]; + //! Array of four 16-bit signed integers. + int16_t sw[4]; + //! Array of four 16-bit unsigned integers. + uint16_t uw[4]; + //! Array of two 32-bit signed integers. + int32_t sd[2]; + //! Array of two 32-bit unsigned integers. + uint32_t ud[2]; + //! Array of one 64-bit signed integer. + int64_t sq[1]; + //! Array of one 64-bit unsigned integer. + uint64_t uq[1]; + + //! Array of two SP-FP values. + float sf[2]; + //! Array of one DP-FP value. + double df[1]; +}; + +// ============================================================================ +// [asmjit::Vec128] +// ============================================================================ + +//! 128-bit vector register data. +union Vec128 { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Set all sixteen 8-bit signed integers. + static ASMJIT_INLINE Vec128 fromSB(int8_t x0) noexcept { + Vec128 self; + self.setSB(x0); + return self; + } + + //! Set all sixteen 8-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUB(uint8_t x0) noexcept { + Vec128 self; + self.setUB(x0); + return self; + } + + //! Set all sixteen 8-bit signed integers. + static ASMJIT_INLINE Vec128 fromSB( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15) noexcept { + + Vec128 self; + self.setSB(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! Set all sixteen 8-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUB( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) noexcept { + + Vec128 self; + self.setUB(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! Set all eight 16-bit signed integers. + static ASMJIT_INLINE Vec128 fromSW(int16_t x0) noexcept { + Vec128 self; + self.setSW(x0); + return self; + } + + //! Set all eight 16-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUW(uint16_t x0) noexcept { + Vec128 self; + self.setUW(x0); + return self; + } + + //! Set all eight 16-bit signed integers. + static ASMJIT_INLINE Vec128 fromSW( + int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) noexcept { + + Vec128 self; + self.setSW(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! Set all eight 16-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUW( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) noexcept { + + Vec128 self; + self.setUW(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! Set all four 32-bit signed integers. + static ASMJIT_INLINE Vec128 fromSD(int32_t x0) noexcept { + Vec128 self; + self.setSD(x0); + return self; + } + + //! Set all four 32-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUD(uint32_t x0) noexcept { + Vec128 self; + self.setUD(x0); + return self; + } + + //! Set all four 32-bit signed integers. + static ASMJIT_INLINE Vec128 fromSD(int32_t x0, int32_t x1, int32_t x2, int32_t x3) noexcept { + Vec128 self; + self.setSD(x0, x1, x2, x3); + return self; + } + + //! Set all four 32-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUD(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) noexcept { + Vec128 self; + self.setUD(x0, x1, x2, x3); + return self; + } + + //! Set all two 64-bit signed integers. + static ASMJIT_INLINE Vec128 fromSQ(int64_t x0) noexcept { + Vec128 self; + self.setSQ(x0); + return self; + } + + //! Set all two 64-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUQ(uint64_t x0) noexcept { + Vec128 self; + self.setUQ(x0); + return self; + } + + //! Set all two 64-bit signed integers. + static ASMJIT_INLINE Vec128 fromSQ(int64_t x0, int64_t x1) noexcept { + Vec128 self; + self.setSQ(x0, x1); + return self; + } + + //! Set all two 64-bit unsigned integers. + static ASMJIT_INLINE Vec128 fromUQ(uint64_t x0, uint64_t x1) noexcept { + Vec128 self; + self.setUQ(x0, x1); + return self; + } + + //! Set all four SP-FP floats. + static ASMJIT_INLINE Vec128 fromSF(float x0) noexcept { + Vec128 self; + self.setSF(x0); + return self; + } + + //! Set all four SP-FP floats. + static ASMJIT_INLINE Vec128 fromSF(float x0, float x1, float x2, float x3) noexcept { + Vec128 self; + self.setSF(x0, x1, x2, x3); + return self; + } + + //! Set all two DP-FP floats. + static ASMJIT_INLINE Vec128 fromDF(double x0) noexcept { + Vec128 self; + self.setDF(x0); + return self; + } + + //! Set all two DP-FP floats. + static ASMJIT_INLINE Vec128 fromDF(double x0, double x1) noexcept { + Vec128 self; + self.setDF(x0, x1); + return self; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Set all sixteen 8-bit signed integers. + ASMJIT_INLINE void setSB(int8_t x0) noexcept { + setUB(static_cast(x0)); + } + + //! Set all sixteen 8-bit unsigned integers. + ASMJIT_INLINE void setUB(uint8_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t xq = static_cast(x0) * ASMJIT_UINT64_C(0x0101010101010101); + uq[0] = xq; + uq[1] = xq; + } + else { + uint32_t xd = static_cast(x0) * static_cast(0x01010101U); + ud[0] = xd; + ud[1] = xd; + ud[2] = xd; + ud[3] = xd; + } + } + + //! Set all sixteen 8-bit signed integers. + ASMJIT_INLINE void setSB( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15) noexcept { + + sb[0 ] = x0 ; sb[1 ] = x1 ; sb[2 ] = x2 ; sb[3 ] = x3 ; + sb[4 ] = x4 ; sb[5 ] = x5 ; sb[6 ] = x6 ; sb[7 ] = x7 ; + sb[8 ] = x8 ; sb[9 ] = x9 ; sb[10] = x10; sb[11] = x11; + sb[12] = x12; sb[13] = x13; sb[14] = x14; sb[15] = x15; + } + + //! Set all sixteen 8-bit unsigned integers. + ASMJIT_INLINE void setUB( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) noexcept { + + ub[0 ] = x0 ; ub[1 ] = x1 ; ub[2 ] = x2 ; ub[3 ] = x3 ; + ub[4 ] = x4 ; ub[5 ] = x5 ; ub[6 ] = x6 ; ub[7 ] = x7 ; + ub[8 ] = x8 ; ub[9 ] = x9 ; ub[10] = x10; ub[11] = x11; + ub[12] = x12; ub[13] = x13; ub[14] = x14; ub[15] = x15; + } + + //! Set all eight 16-bit signed integers. + ASMJIT_INLINE void setSW(int16_t x0) noexcept { + setUW(static_cast(x0)); + } + + //! Set all eight 16-bit unsigned integers. + ASMJIT_INLINE void setUW(uint16_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t xq = static_cast(x0) * ASMJIT_UINT64_C(0x0001000100010001); + uq[0] = xq; + uq[1] = xq; + } + else { + uint32_t xd = static_cast(x0) * static_cast(0x00010001U); + ud[0] = xd; + ud[1] = xd; + ud[2] = xd; + ud[3] = xd; + } + } + + //! Set all eight 16-bit signed integers. + ASMJIT_INLINE void setSW( + int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) noexcept { + + sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3; + sw[4] = x4; sw[5] = x5; sw[6] = x6; sw[7] = x7; + } + + //! Set all eight 16-bit unsigned integers. + ASMJIT_INLINE void setUW( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) noexcept { + + uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3; + uw[4] = x4; uw[5] = x5; uw[6] = x6; uw[7] = x7; + } + + //! Set all four 32-bit signed integers. + ASMJIT_INLINE void setSD(int32_t x0) noexcept { + setUD(static_cast(x0)); + } + + //! Set all four 32-bit unsigned integers. + ASMJIT_INLINE void setUD(uint32_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t t = (static_cast(x0) << 32) + x0; + uq[0] = t; + uq[1] = t; + } + else { + ud[0] = x0; + ud[1] = x0; + ud[2] = x0; + ud[3] = x0; + } + } + + //! Set all four 32-bit signed integers. + ASMJIT_INLINE void setSD(int32_t x0, int32_t x1, int32_t x2, int32_t x3) noexcept { + sd[0] = x0; sd[1] = x1; sd[2] = x2; sd[3] = x3; + } + + //! Set all four 32-bit unsigned integers. + ASMJIT_INLINE void setUD(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) noexcept { + ud[0] = x0; ud[1] = x1; ud[2] = x2; ud[3] = x3; + } + + //! Set all two 64-bit signed integers. + ASMJIT_INLINE void setSQ(int64_t x0) noexcept { + sq[0] = x0; sq[1] = x0; + } + + //! Set all two 64-bit unsigned integers. + ASMJIT_INLINE void setUQ(uint64_t x0) noexcept { + uq[0] = x0; uq[1] = x0; + } + + //! Set all two 64-bit signed integers. + ASMJIT_INLINE void setSQ(int64_t x0, int64_t x1) noexcept { + sq[0] = x0; sq[1] = x1; + } + + //! Set all two 64-bit unsigned integers. + ASMJIT_INLINE void setUQ(uint64_t x0, uint64_t x1) noexcept { + uq[0] = x0; uq[1] = x1; + } + + //! Set all four SP-FP floats. + ASMJIT_INLINE void setSF(float x0) noexcept { + sf[0] = x0; sf[1] = x0; sf[2] = x0; sf[3] = x0; + } + + //! Set all four SP-FP floats. + ASMJIT_INLINE void setSF(float x0, float x1, float x2, float x3) noexcept { + sf[0] = x0; sf[1] = x1; sf[2] = x2; sf[3] = x3; + } + + //! Set all two DP-FP floats. + ASMJIT_INLINE void setDF(double x0) noexcept { + df[0] = x0; df[1] = x0; + } + + //! Set all two DP-FP floats. + ASMJIT_INLINE void setDF(double x0, double x1) noexcept { + df[0] = x0; df[1] = x1; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Array of sixteen 8-bit signed integers. + int8_t sb[16]; + //! Array of sixteen 8-bit unsigned integers. + uint8_t ub[16]; + //! Array of eight 16-bit signed integers. + int16_t sw[8]; + //! Array of eight 16-bit unsigned integers. + uint16_t uw[8]; + //! Array of four 32-bit signed integers. + int32_t sd[4]; + //! Array of four 32-bit unsigned integers. + uint32_t ud[4]; + //! Array of two 64-bit signed integers. + int64_t sq[2]; + //! Array of two 64-bit unsigned integers. + uint64_t uq[2]; + + //! Array of four 32-bit single precision floating points. + float sf[4]; + //! Array of two 64-bit double precision floating points. + double df[2]; +}; + +// ============================================================================ +// [asmjit::Vec256] +// ============================================================================ + +//! 256-bit vector register data. +union Vec256 { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Set all thirty two 8-bit signed integers. + static ASMJIT_INLINE Vec256 fromSB(int8_t x0) noexcept { + Vec256 self; + self.setSB(x0); + return self; + } + + //! Set all thirty two 8-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUB(uint8_t x0) noexcept { + Vec256 self; + self.setUB(x0); + return self; + } + + //! Set all thirty two 8-bit signed integers. + static ASMJIT_INLINE Vec256 fromSB( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15, + int8_t x16, int8_t x17, int8_t x18, int8_t x19, + int8_t x20, int8_t x21, int8_t x22, int8_t x23, + int8_t x24, int8_t x25, int8_t x26, int8_t x27, + int8_t x28, int8_t x29, int8_t x30, int8_t x31) noexcept { + + Vec256 self; + self.setSB( + x0, x1 , x2 , x3 , x4 , x5 , x6 , x7 , x8 , x9 , x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31); + return self; + } + + //! Set all thirty two 8-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUB( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15, + uint8_t x16, uint8_t x17, uint8_t x18, uint8_t x19, + uint8_t x20, uint8_t x21, uint8_t x22, uint8_t x23, + uint8_t x24, uint8_t x25, uint8_t x26, uint8_t x27, + uint8_t x28, uint8_t x29, uint8_t x30, uint8_t x31) noexcept { + + Vec256 self; + self.setUB( + x0, x1 , x2 , x3 , x4 , x5 , x6 , x7 , x8 , x9 , x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31); + return self; + } + + //! Set all sixteen 16-bit signed integers. + static ASMJIT_INLINE Vec256 fromSW(int16_t x0) noexcept { + Vec256 self; + self.setSW(x0); + return self; + } + + //! Set all sixteen 16-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUW(uint16_t x0) noexcept { + Vec256 self; + self.setUW(x0); + return self; + } + + //! Set all sixteen 16-bit signed integers. + static ASMJIT_INLINE Vec256 fromSW( + int16_t x0, int16_t x1, int16_t x2 , int16_t x3 , int16_t x4 , int16_t x5 , int16_t x6 , int16_t x7 , + int16_t x8, int16_t x9, int16_t x10, int16_t x11, int16_t x12, int16_t x13, int16_t x14, int16_t x15) noexcept { + + Vec256 self; + self.setSW(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! Set all sixteen 16-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUW( + uint16_t x0, uint16_t x1, uint16_t x2 , uint16_t x3 , uint16_t x4 , uint16_t x5 , uint16_t x6 , uint16_t x7 , + uint16_t x8, uint16_t x9, uint16_t x10, uint16_t x11, uint16_t x12, uint16_t x13, uint16_t x14, uint16_t x15) noexcept { + + Vec256 self; + self.setUW(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! Set all eight 32-bit signed integers. + static ASMJIT_INLINE Vec256 fromSD(int32_t x0) noexcept { + Vec256 self; + self.setSD(x0); + return self; + } + + //! Set all eight 32-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUD(uint32_t x0) noexcept { + Vec256 self; + self.setUD(x0); + return self; + } + + //! Set all eight 32-bit signed integers. + static ASMJIT_INLINE Vec256 fromSD( + int32_t x0, int32_t x1, int32_t x2, int32_t x3, + int32_t x4, int32_t x5, int32_t x6, int32_t x7) noexcept { + + Vec256 self; + self.setSD(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! Set all eight 32-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUD( + uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) noexcept { + + Vec256 self; + self.setUD(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! Set all four 64-bit signed integers. + static ASMJIT_INLINE Vec256 fromSQ(int64_t x0) noexcept { + Vec256 self; + self.setSQ(x0); + return self; + } + + //! Set all four 64-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUQ(uint64_t x0) noexcept { + Vec256 self; + self.setUQ(x0); + return self; + } + + //! Set all four 64-bit signed integers. + static ASMJIT_INLINE Vec256 fromSQ(int64_t x0, int64_t x1, int64_t x2, int64_t x3) noexcept { + Vec256 self; + self.setSQ(x0, x1, x2, x3); + return self; + } + + //! Set all four 64-bit unsigned integers. + static ASMJIT_INLINE Vec256 fromUQ(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3) noexcept { + Vec256 self; + self.setUQ(x0, x1, x2, x3); + return self; + } + + //! Set all eight SP-FP floats. + static ASMJIT_INLINE Vec256 fromSF(float x0) noexcept { + Vec256 self; + self.setSF(x0); + return self; + } + + //! Set all eight SP-FP floats. + static ASMJIT_INLINE Vec256 fromSF( + float x0, float x1, float x2, float x3, + float x4, float x5, float x6, float x7) noexcept { + + Vec256 self; + self.setSF(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! Set all four DP-FP floats. + static ASMJIT_INLINE Vec256 fromDF(double x0) noexcept { + Vec256 self; + self.setDF(x0); + return self; + } + + //! Set all four DP-FP floats. + static ASMJIT_INLINE Vec256 fromDF(double x0, double x1, double x2, double x3) noexcept { + Vec256 self; + self.setDF(x0, x1, x2, x3); + return self; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Set all thirty two 8-bit signed integers. + ASMJIT_INLINE void setSB(int8_t x0) noexcept { + setUB(static_cast(x0)); + } + + //! Set all thirty two 8-bit unsigned integers. + ASMJIT_INLINE void setUB(uint8_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t xq = static_cast(x0)* ASMJIT_UINT64_C(0x0101010101010101); + uq[0] = xq; + uq[1] = xq; + uq[2] = xq; + uq[3] = xq; + } + else { + uint32_t xd = static_cast(x0)* static_cast(0x01010101U); + ud[0] = xd; + ud[1] = xd; + ud[2] = xd; + ud[3] = xd; + ud[4] = xd; + ud[5] = xd; + ud[6] = xd; + ud[7] = xd; + } + } + + //! Set all thirty two 8-bit signed integers. + ASMJIT_INLINE void setSB( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15, + int8_t x16, int8_t x17, int8_t x18, int8_t x19, + int8_t x20, int8_t x21, int8_t x22, int8_t x23, + int8_t x24, int8_t x25, int8_t x26, int8_t x27, + int8_t x28, int8_t x29, int8_t x30, int8_t x31) noexcept { + + sb[0 ] = x0 ; sb[1 ] = x1 ; sb[2 ] = x2 ; sb[3 ] = x3 ; + sb[4 ] = x4 ; sb[5 ] = x5 ; sb[6 ] = x6 ; sb[7 ] = x7 ; + sb[8 ] = x8 ; sb[9 ] = x9 ; sb[10] = x10; sb[11] = x11; + sb[12] = x12; sb[13] = x13; sb[14] = x14; sb[15] = x15; + sb[16] = x16; sb[17] = x17; sb[18] = x18; sb[19] = x19; + sb[20] = x20; sb[21] = x21; sb[22] = x22; sb[23] = x23; + sb[24] = x24; sb[25] = x25; sb[26] = x26; sb[27] = x27; + sb[28] = x28; sb[29] = x29; sb[30] = x30; sb[31] = x31; + } + + //! Set all thirty two 8-bit unsigned integers. + ASMJIT_INLINE void setUB( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15, + uint8_t x16, uint8_t x17, uint8_t x18, uint8_t x19, + uint8_t x20, uint8_t x21, uint8_t x22, uint8_t x23, + uint8_t x24, uint8_t x25, uint8_t x26, uint8_t x27, + uint8_t x28, uint8_t x29, uint8_t x30, uint8_t x31) noexcept { + + ub[0 ] = x0 ; ub[1 ] = x1 ; ub[2 ] = x2 ; ub[3 ] = x3 ; + ub[4 ] = x4 ; ub[5 ] = x5 ; ub[6 ] = x6 ; ub[7 ] = x7 ; + ub[8 ] = x8 ; ub[9 ] = x9 ; ub[10] = x10; ub[11] = x11; + ub[12] = x12; ub[13] = x13; ub[14] = x14; ub[15] = x15; + ub[16] = x16; ub[17] = x17; ub[18] = x18; ub[19] = x19; + ub[20] = x20; ub[21] = x21; ub[22] = x22; ub[23] = x23; + ub[24] = x24; ub[25] = x25; ub[26] = x26; ub[27] = x27; + ub[28] = x28; ub[29] = x29; ub[30] = x30; ub[31] = x31; + } + + //! Set all sixteen 16-bit signed integers. + ASMJIT_INLINE void setSW(int16_t x0) noexcept { + setUW(static_cast(x0)); + } + + //! Set all eight 16-bit unsigned integers. + ASMJIT_INLINE void setUW(uint16_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t xq = static_cast(x0)* ASMJIT_UINT64_C(0x0001000100010001); + uq[0] = xq; + uq[1] = xq; + uq[2] = xq; + uq[3] = xq; + } + else { + uint32_t xd = static_cast(x0)* static_cast(0x00010001U); + ud[0] = xd; + ud[1] = xd; + ud[2] = xd; + ud[3] = xd; + ud[4] = xd; + ud[5] = xd; + ud[6] = xd; + ud[7] = xd; + } + } + + //! Set all sixteen 16-bit signed integers. + ASMJIT_INLINE void setSW( + int16_t x0, int16_t x1, int16_t x2 , int16_t x3 , int16_t x4 , int16_t x5 , int16_t x6 , int16_t x7, + int16_t x8, int16_t x9, int16_t x10, int16_t x11, int16_t x12, int16_t x13, int16_t x14, int16_t x15) noexcept { + + sw[0 ] = x0 ; sw[1 ] = x1 ; sw[2 ] = x2 ; sw[3 ] = x3 ; + sw[4 ] = x4 ; sw[5 ] = x5 ; sw[6 ] = x6 ; sw[7 ] = x7 ; + sw[8 ] = x8 ; sw[9 ] = x9 ; sw[10] = x10; sw[11] = x11; + sw[12] = x12; sw[13] = x13; sw[14] = x14; sw[15] = x15; + } + + //! Set all sixteen 16-bit unsigned integers. + ASMJIT_INLINE void setUW( + uint16_t x0, uint16_t x1, uint16_t x2 , uint16_t x3 , uint16_t x4 , uint16_t x5 , uint16_t x6 , uint16_t x7, + uint16_t x8, uint16_t x9, uint16_t x10, uint16_t x11, uint16_t x12, uint16_t x13, uint16_t x14, uint16_t x15) noexcept { + + uw[0 ] = x0 ; uw[1 ] = x1 ; uw[2 ] = x2 ; uw[3 ] = x3 ; + uw[4 ] = x4 ; uw[5 ] = x5 ; uw[6 ] = x6 ; uw[7 ] = x7 ; + uw[8 ] = x8 ; uw[9 ] = x9 ; uw[10] = x10; uw[11] = x11; + uw[12] = x12; uw[13] = x13; uw[14] = x14; uw[15] = x15; + } + + //! Set all eight 32-bit signed integers. + ASMJIT_INLINE void setSD(int32_t x0) noexcept { + setUD(static_cast(x0)); + } + + //! Set all eight 32-bit unsigned integers. + ASMJIT_INLINE void setUD(uint32_t x0) noexcept { + if (ASMJIT_ARCH_64BIT) { + uint64_t xq = (static_cast(x0) << 32) + x0; + uq[0] = xq; + uq[1] = xq; + uq[2] = xq; + uq[3] = xq; + } + else { + ud[0] = x0; + ud[1] = x0; + ud[2] = x0; + ud[3] = x0; + ud[4] = x0; + ud[5] = x0; + ud[6] = x0; + ud[7] = x0; + } + } + + //! Set all eight 32-bit signed integers. + ASMJIT_INLINE void setSD( + int32_t x0, int32_t x1, int32_t x2, int32_t x3, + int32_t x4, int32_t x5, int32_t x6, int32_t x7) noexcept { + + sd[0] = x0; sd[1] = x1; sd[2] = x2; sd[3] = x3; + sd[4] = x4; sd[5] = x5; sd[6] = x6; sd[7] = x7; + } + + //! Set all eight 32-bit unsigned integers. + ASMJIT_INLINE void setUD( + uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) noexcept { + + ud[0] = x0; ud[1] = x1; ud[2] = x2; ud[3] = x3; + ud[4] = x4; ud[5] = x5; ud[6] = x6; ud[7] = x7; + } + + //! Set all four 64-bit signed integers. + ASMJIT_INLINE void setSQ(int64_t x0) noexcept { + sq[0] = x0; sq[1] = x0; sq[2] = x0; sq[3] = x0; + } + + //! Set all four 64-bit unsigned integers. + ASMJIT_INLINE void setUQ(uint64_t x0) noexcept { + uq[0] = x0; uq[1] = x0; uq[2] = x0; uq[3] = x0; + } + + //! Set all four 64-bit signed integers. + ASMJIT_INLINE void setSQ(int64_t x0, int64_t x1, int64_t x2, int64_t x3) noexcept { + sq[0] = x0; sq[1] = x1; sq[2] = x2; sq[3] = x3; + } + + //! Set all four 64-bit unsigned integers. + ASMJIT_INLINE void setUQ(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3) noexcept { + uq[0] = x0; uq[1] = x1; uq[2] = x2; uq[3] = x3; + } + + //! Set all eight SP-FP floats. + ASMJIT_INLINE void setSF(float x0) noexcept { + sf[0] = x0; sf[1] = x0; sf[2] = x0; sf[3] = x0; + sf[4] = x0; sf[5] = x0; sf[6] = x0; sf[7] = x0; + } + + //! Set all eight SP-FP floats. + ASMJIT_INLINE void setSF( + float x0, float x1, float x2, float x3, + float x4, float x5, float x6, float x7) noexcept { + + sf[0] = x0; sf[1] = x1; sf[2] = x2; sf[3] = x3; + sf[4] = x4; sf[5] = x5; sf[6] = x6; sf[7] = x7; + } + + //! Set all four DP-FP floats. + ASMJIT_INLINE void setDF(double x0) noexcept { + df[0] = x0; df[1] = x0; df[2] = x0; df[3] = x0; + } + + //! Set all four DP-FP floats. + ASMJIT_INLINE void setDF(double x0, double x1, double x2, double x3) noexcept { + df[0] = x0; df[1] = x1; df[2] = x2; df[3] = x3; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Array of thirty two 8-bit signed integers. + int8_t sb[32]; + //! Array of thirty two 8-bit unsigned integers. + uint8_t ub[32]; + //! Array of sixteen 16-bit signed integers. + int16_t sw[16]; + //! Array of sixteen 16-bit unsigned integers. + uint16_t uw[16]; + //! Array of eight 32-bit signed integers. + int32_t sd[8]; + //! Array of eight 32-bit unsigned integers. + uint32_t ud[8]; + //! Array of four 64-bit signed integers. + int64_t sq[4]; + //! Array of four 64-bit unsigned integers. + uint64_t uq[4]; + + //! Array of eight 32-bit single precision floating points. + float sf[8]; + //! Array of four 64-bit double precision floating points. + double df[4]; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_VECTYPES_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/vmem.cpp b/DynamicHooks/thirdparty/AsmJit/base/vmem.cpp new file mode 100644 index 0000000..7a77516 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/vmem.cpp @@ -0,0 +1,1282 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/globals.h" +#include "../base/vmem.h" + +#if ASMJIT_OS_POSIX +# include +# include +# include +#endif // ASMJIT_OS_POSIX + +// [Api-Begin] +#include "../apibegin.h" + +// This file contains implementation of virtual memory management for AsmJit +// library. The initial concept is to keep this implementation simple but +// efficient. There are several goals I decided to write implementation myself. +// +// Goals: +// +// - Granularity of allocated blocks is different than granularity for a typical +// C malloc. It is at least 64-bytes so Assembler/Compiler can guarantee the +// alignment required. Alignment requirements can grow in the future, but at +// the moment 64 bytes is safe (we may jump to 128 bytes if necessary or make +// it configurable). +// +// - Keep memory manager information outside of the allocated virtual memory +// pages, because these pages allow executing of machine code and there should +// not be data required to keep track of these blocks. Another reason is that +// some environments (i.e. iOS) allow to generate and run JIT code, but this +// code has to be set to [Executable, but not Writable]. +// +// - Keep implementation simple and easy to follow. +// +// Implementation is based on bit arrays and binary trees. Bit arrays contain +// information related to allocated and unused blocks of memory. The size of +// a block is described by `MemNode::density`. Count of blocks is stored in +// `MemNode::blocks`. For example if density is 64 and count of blocks is 20, +// memory node contains 64*20 bytes of memory and smallest possible allocation +// (and also alignment) is 64 bytes. So density is also related to memory +// alignment. Binary trees (RB) are used to enable fast lookup into all addresses +// allocated by memory manager instance. This is used mainly by `VMemPrivate::release()`. +// +// Bit array looks like this (empty = unused, X = used) - Size of block 64: +// +// ------------------------------------------------------------------------- +// | |X|X| | | | | |X|X|X|X|X|X| | | | | | | | | | | | |X| | | | |X|X|X| | | +// ------------------------------------------------------------------------- +// (Maximum continuous block) +// +// These bits show that there are 12 allocated blocks (X) of 64 bytes, so total +// size allocated is 768 bytes. Maximum count of continuous memory is 12 * 64. + +namespace asmjit { + +// ============================================================================ +// [asmjit::VMemUtil - Windows] +// ============================================================================ + +// Windows specific implementation using `VirtualAllocEx` and `VirtualFree`. +#if ASMJIT_OS_WINDOWS +struct VMemLocal { + // AsmJit allows to pass a `nullptr` handle to `VMemUtil`. This function is + // just a convenient way to convert such handle to the current process one. + ASMJIT_INLINE HANDLE getSafeProcessHandle(HANDLE hParam) const noexcept { + return hParam != nullptr ? hParam : hProcess; + } + + size_t pageSize; + size_t pageGranularity; + HANDLE hProcess; +}; +static VMemLocal vMemLocal; + +static const VMemLocal& vMemGet() noexcept { + VMemLocal& vMem = vMemLocal; + + if (!vMem.hProcess) { + SYSTEM_INFO info; + ::GetSystemInfo(&info); + + vMem.pageSize = Utils::alignToPowerOf2(info.dwPageSize); + vMem.pageGranularity = info.dwAllocationGranularity; + + vMem.hProcess = ::GetCurrentProcess(); + } + + return vMem; +}; + +size_t VMemUtil::getPageSize() noexcept { + const VMemLocal& vMem = vMemGet(); + return vMem.pageSize; +} + +size_t VMemUtil::getPageGranularity() noexcept { + const VMemLocal& vMem = vMemGet(); + return vMem.pageGranularity; +} + +void* VMemUtil::alloc(size_t length, size_t* allocated, uint32_t flags) noexcept { + return allocProcessMemory(static_cast(0), length, allocated, flags); +} + +void* VMemUtil::allocProcessMemory(HANDLE hProcess, size_t length, size_t* allocated, uint32_t flags) noexcept { + if (length == 0) + return nullptr; + + const VMemLocal& vMem = vMemGet(); + hProcess = vMem.getSafeProcessHandle(hProcess); + + // VirtualAlloc rounds allocated size to a page size automatically. + size_t mSize = Utils::alignTo(length, vMem.pageSize); + + // Windows XP SP2 / Vista allow Data Excution Prevention (DEP). + DWORD protectFlags = 0; + + if (flags & kVMemFlagExecutable) + protectFlags |= (flags & kVMemFlagWritable) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ; + else + protectFlags |= (flags & kVMemFlagWritable) ? PAGE_READWRITE : PAGE_READONLY; + + LPVOID mBase = ::VirtualAllocEx(hProcess, nullptr, mSize, MEM_COMMIT | MEM_RESERVE, protectFlags); + if (mBase == nullptr) + return nullptr; + + ASMJIT_ASSERT(Utils::isAligned( + reinterpret_cast(mBase), vMem.pageSize)); + + if (allocated != nullptr) + *allocated = mSize; + return mBase; +} + +Error VMemUtil::release(void* addr, size_t length) noexcept { + return releaseProcessMemory(static_cast(0), addr, length); +} + +Error VMemUtil::releaseProcessMemory(HANDLE hProcess, void* addr, size_t /* length */) noexcept { + hProcess = vMemGet().getSafeProcessHandle(hProcess); + if (!::VirtualFreeEx(hProcess, addr, 0, MEM_RELEASE)) + return kErrorInvalidState; + return kErrorOk; +} +#endif // ASMJIT_OS_WINDOWS + +// ============================================================================ +// [asmjit::VMemUtil - Posix] +// ============================================================================ + +// Posix specific implementation using `mmap` and `munmap`. +#if ASMJIT_OS_POSIX + +// MacOS uses MAP_ANON instead of MAP_ANONYMOUS. +#if !defined(MAP_ANONYMOUS) +# define MAP_ANONYMOUS MAP_ANON +#endif // MAP_ANONYMOUS + +struct VMemLocal { + size_t pageSize; + size_t pageGranularity; +}; +static VMemLocal vMemLocal; + +static const VMemLocal& vMemGet() noexcept { + VMemLocal& vMem = vMemLocal; + + if (!vMem.pageSize) { + size_t pageSize = ::getpagesize(); + vMem.pageSize = pageSize; + vMem.pageGranularity = Utils::iMax(pageSize, 65536); + } + + return vMem; +}; + +size_t VMemUtil::getPageSize() noexcept { + const VMemLocal& vMem = vMemGet(); + return vMem.pageSize; +} + +size_t VMemUtil::getPageGranularity() noexcept { + const VMemLocal& vMem = vMemGet(); + return vMem.pageGranularity; +} + +void* VMemUtil::alloc(size_t length, size_t* allocated, uint32_t flags) noexcept { + const VMemLocal& vMem = vMemGet(); + size_t msize = Utils::alignTo(length, vMem.pageSize); + int protection = PROT_READ; + + if (flags & kVMemFlagWritable ) protection |= PROT_WRITE; + if (flags & kVMemFlagExecutable) protection |= PROT_EXEC; + + void* mbase = ::mmap(nullptr, msize, protection, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mbase == MAP_FAILED) + return nullptr; + + if (allocated != nullptr) + *allocated = msize; + return mbase; +} + +Error VMemUtil::release(void* addr, size_t length) noexcept { + if (::munmap(addr, length) != 0) + return kErrorInvalidState; + + return kErrorOk; +} +#endif // ASMJIT_OS_POSIX + +// ============================================================================ +// [asmjit::VMemMgr - BitOps] +// ============================================================================ + +#define M_DIV(x, y) ((x) / (y)) +#define M_MOD(x, y) ((x) % (y)) + +//! \internal +enum { + kBitsPerEntity = (sizeof(size_t) * 8) +}; + +//! \internal +//! +//! Set `len` bits in `buf` starting at `index` bit index. +static void _SetBits(size_t* buf, size_t index, size_t len) noexcept { + if (len == 0) + return; + + size_t i = index / kBitsPerEntity; // size_t[] + size_t j = index % kBitsPerEntity; // size_t[][] bit index + + // How many bytes process in the first group. + size_t c = kBitsPerEntity - j; + if (c > len) + c = len; + + // Offset. + buf += i; + + *buf++ |= ((~(size_t)0) >> (kBitsPerEntity - c)) << j; + len -= c; + + while (len >= kBitsPerEntity) { + *buf++ = ~(size_t)0; + len -= kBitsPerEntity; + } + + if (len) + *buf |= ((~(size_t)0) >> (kBitsPerEntity - len)); +} + +// ============================================================================ +// [asmjit::VMemMgr::TypeDefs] +// ============================================================================ + +typedef VMemMgr::RbNode RbNode; +typedef VMemMgr::MemNode MemNode; +typedef VMemMgr::PermanentNode PermanentNode; + +// ============================================================================ +// [asmjit::VMemMgr::RbNode] +// ============================================================================ + +//! \internal +//! +//! Base red-black tree node. +struct VMemMgr::RbNode { + // Implementation is based on article by Julienne Walker (Public Domain), + // including C code and original comments. Thanks for the excellent article. + + // Left[0] and right[1] nodes. + RbNode* node[2]; + // Virtual memory address. + uint8_t* mem; + // Whether the node is RED. + uint32_t red; +}; + +//! \internal +//! +//! Get whether the node is red (nullptr or node with red flag). +static ASMJIT_INLINE bool rbIsRed(RbNode* node) noexcept { + return node != nullptr && node->red; +} + +//! \internal +//! +//! Check whether the RB tree is valid. +static int rbAssert(RbNode* root) noexcept { + if (root == nullptr) + return 1; + + RbNode* ln = root->node[0]; + RbNode* rn = root->node[1]; + + // Red violation. + ASMJIT_ASSERT( !(rbIsRed(root) && (rbIsRed(ln) || rbIsRed(rn))) ); + + int lh = rbAssert(ln); + int rh = rbAssert(rn); + + // Invalid btree. + ASMJIT_ASSERT(ln == nullptr || ln->mem < root->mem); + ASMJIT_ASSERT(rn == nullptr || rn->mem > root->mem); + + // Black violation. + ASMJIT_ASSERT( !(lh != 0 && rh != 0 && lh != rh) ); + + // Only count black links. + if (lh != 0 && rh != 0) + return rbIsRed(root) ? lh : lh + 1; + else + return 0; +} + +//! \internal +//! +//! Single rotation. +static ASMJIT_INLINE RbNode* rbRotateSingle(RbNode* root, int dir) noexcept { + RbNode* save = root->node[!dir]; + + root->node[!dir] = save->node[dir]; + save->node[dir] = root; + + root->red = 1; + save->red = 0; + + return save; +} + +//! \internal +//! +//! Double rotation. +static ASMJIT_INLINE RbNode* rbRotateDouble(RbNode* root, int dir) noexcept { + root->node[!dir] = rbRotateSingle(root->node[!dir], !dir); + return rbRotateSingle(root, dir); +} + +// ============================================================================ +// [asmjit::VMemMgr::MemNode] +// ============================================================================ + +struct VMemMgr::MemNode : public RbNode { + // -------------------------------------------------------------------------- + // [Helpers] + // -------------------------------------------------------------------------- + + // Get available space. + ASMJIT_INLINE size_t getAvailable() const noexcept { + return size - used; + } + + ASMJIT_INLINE void fillData(MemNode* other) noexcept { + mem = other->mem; + + size = other->size; + used = other->used; + blocks = other->blocks; + density = other->density; + largestBlock = other->largestBlock; + + baUsed = other->baUsed; + baCont = other->baCont; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + MemNode* prev; // Prev node in list. + MemNode* next; // Next node in list. + + size_t size; // How many bytes contain this node. + size_t used; // How many bytes are used in this node. + size_t blocks; // How many blocks are here. + size_t density; // Minimum count of allocated bytes in this node (also alignment). + size_t largestBlock; // Contains largest block that can be allocated. + + size_t* baUsed; // Contains bits about used blocks (0 = unused, 1 = used). + size_t* baCont; // Contains bits about continuous blocks (0 = stop , 1 = continue). +}; + +// ============================================================================ +// [asmjit::VMemMgr::PermanentNode] +// ============================================================================ + +//! \internal +//! +//! Permanent node. +struct VMemMgr::PermanentNode { + // -------------------------------------------------------------------------- + // [Helpers] + // -------------------------------------------------------------------------- + + //! Get available space. + ASMJIT_INLINE size_t getAvailable() const noexcept { + return size - used; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + PermanentNode* prev; // Pointer to prev chunk or nullptr. + uint8_t* mem; // Base pointer (virtual memory address). + size_t size; // Count of bytes allocated. + size_t used; // Count of bytes used. +}; + +// ============================================================================ +// [asmjit::VMemMgr - Private] +// ============================================================================ + +//! \internal +//! +//! Helper to avoid `#ifdef`s in the code. +ASMJIT_INLINE uint8_t* vMemMgrAllocVMem(VMemMgr* self, size_t size, size_t* vSize) noexcept { + uint32_t flags = kVMemFlagWritable | kVMemFlagExecutable; +#if !ASMJIT_OS_WINDOWS + return static_cast(VMemUtil::alloc(size, vSize, flags)); +#else + return static_cast(VMemUtil::allocProcessMemory(self->_hProcess, size, vSize, flags)); +#endif +} + +//! \internal +//! +//! Helper to avoid `#ifdef`s in the code. +ASMJIT_INLINE Error vMemMgrReleaseVMem(VMemMgr* self, void* p, size_t vSize) noexcept { +#if !ASMJIT_OS_WINDOWS + return VMemUtil::release(p, vSize); +#else + return VMemUtil::releaseProcessMemory(self->_hProcess, p, vSize); +#endif +} + +//! \internal +//! +//! Check whether the Red-Black tree is valid. +static bool vMemMgrCheckTree(VMemMgr* self) noexcept { + return rbAssert(self->_root) > 0; +} + +//! \internal +//! +//! Alloc virtual memory including a heap memory needed for `MemNode` data. +//! +//! Returns set-up `MemNode*` or nullptr if allocation failed. +static MemNode* vMemMgrCreateNode(VMemMgr* self, size_t size, size_t density) noexcept { + size_t vSize; + uint8_t* vmem = vMemMgrAllocVMem(self, size, &vSize); + + // Out of memory. + if (vmem == nullptr) + return nullptr; + + size_t blocks = (vSize / density); + size_t bsize = (((blocks + 7) >> 3) + sizeof(size_t) - 1) & ~(size_t)(sizeof(size_t) - 1); + + MemNode* node = static_cast(ASMJIT_ALLOC(sizeof(MemNode))); + uint8_t* data = static_cast(ASMJIT_ALLOC(bsize * 2)); + + // Out of memory. + if (node == nullptr || data == nullptr) { + vMemMgrReleaseVMem(self, vmem, vSize); + if (node) ASMJIT_FREE(node); + if (data) ASMJIT_FREE(data); + return nullptr; + } + + // Initialize RbNode data. + node->node[0] = nullptr; + node->node[1] = nullptr; + node->mem = vmem; + node->red = 1; + + // Initialize MemNode data. + node->prev = nullptr; + node->next = nullptr; + + node->size = vSize; + node->used = 0; + node->blocks = blocks; + node->density = density; + node->largestBlock = vSize; + + ::memset(data, 0, bsize * 2); + node->baUsed = reinterpret_cast(data); + node->baCont = reinterpret_cast(data + bsize); + + return node; +} + +static void vMemMgrInsertNode(VMemMgr* self, MemNode* node) noexcept { + if (self->_root == nullptr) { + // Empty tree case. + self->_root = node; + } + else { + // False tree root. + RbNode head = { { nullptr, nullptr }, 0, 0 }; + + // Grandparent & parent. + RbNode* g = nullptr; + RbNode* t = &head; + + // Iterator & parent. + RbNode* p = nullptr; + RbNode* q = t->node[1] = self->_root; + + int dir = 0; + int last = 0; // Not needed to initialize, but makes some tools happy. + + // Search down the tree. + for (;;) { + if (q == nullptr) { + // Insert new node at the bottom. + q = node; + p->node[dir] = node; + } + else if (rbIsRed(q->node[0]) && rbIsRed(q->node[1])) { + // Color flip. + q->red = 1; + q->node[0]->red = 0; + q->node[1]->red = 0; + } + + // Fix red violation. + if (rbIsRed(q) && rbIsRed(p)) { + int dir2 = t->node[1] == g; + t->node[dir2] = q == p->node[last] ? rbRotateSingle(g, !last) : rbRotateDouble(g, !last); + } + + // Stop if found. + if (q == node) + break; + + last = dir; + dir = q->mem < node->mem; + + // Update helpers. + if (g != nullptr) + t = g; + + g = p; + p = q; + q = q->node[dir]; + } + + // Update root. + self->_root = static_cast(head.node[1]); + } + + // Make root black. + self->_root->red = 0; + + // Link with others. + node->prev = self->_last; + + if (self->_first == nullptr) { + self->_first = node; + self->_last = node; + self->_optimal = node; + } + else { + node->prev = self->_last; + self->_last->next = node; + self->_last = node; + } +} + +//! \internal +//! +//! Remove node from Red-Black tree. +//! +//! Returns node that should be freed, but it doesn't have to be necessarily +//! the `node` passed. +static MemNode* vMemMgrRemoveNode(VMemMgr* self, MemNode* node) noexcept { + // False tree root. + RbNode head = { { nullptr, nullptr }, 0, 0 }; + + // Helpers. + RbNode* q = &head; + RbNode* p = nullptr; + RbNode* g = nullptr; + + // Found item. + RbNode* f = nullptr; + int dir = 1; + + // Set up. + q->node[1] = self->_root; + + // Search and push a red down. + while (q->node[dir] != nullptr) { + int last = dir; + + // Update helpers. + g = p; + p = q; + q = q->node[dir]; + dir = q->mem < node->mem; + + // Save found node. + if (q == node) + f = q; + + // Push the red node down. + if (!rbIsRed(q) && !rbIsRed(q->node[dir])) { + if (rbIsRed(q->node[!dir])) { + p = p->node[last] = rbRotateSingle(q, dir); + } + else if (!rbIsRed(q->node[!dir])) { + RbNode* s = p->node[!last]; + + if (s != nullptr) { + if (!rbIsRed(s->node[!last]) && !rbIsRed(s->node[last])) { + // Color flip. + p->red = 0; + s->red = 1; + q->red = 1; + } + else { + int dir2 = g->node[1] == p; + + if (rbIsRed(s->node[last])) + g->node[dir2] = rbRotateDouble(p, last); + else if (rbIsRed(s->node[!last])) + g->node[dir2] = rbRotateSingle(p, last); + + // Ensure correct coloring. + q->red = g->node[dir2]->red = 1; + g->node[dir2]->node[0]->red = 0; + g->node[dir2]->node[1]->red = 0; + } + } + } + } + } + + // Replace and remove. + ASMJIT_ASSERT(f != nullptr); + ASMJIT_ASSERT(f != &head); + ASMJIT_ASSERT(q != &head); + + if (f != q) { + ASMJIT_ASSERT(f != &head); + static_cast(f)->fillData(static_cast(q)); + } + + p->node[p->node[1] == q] = q->node[q->node[0] == nullptr]; + + // Update root and make it black. + self->_root = static_cast(head.node[1]); + if (self->_root != nullptr) + self->_root->red = 0; + + // Unlink. + MemNode* next = static_cast(q)->next; + MemNode* prev = static_cast(q)->prev; + + if (prev) + prev->next = next; + else + self->_first = next; + + if (next) + next->prev = prev; + else + self->_last = prev; + + if (self->_optimal == q) + self->_optimal = prev ? prev : next; + + return static_cast(q); +} + +static MemNode* vMemMgrFindNodeByPtr(VMemMgr* self, uint8_t* mem) noexcept { + MemNode* node = self->_root; + while (node != nullptr) { + uint8_t* nodeMem = node->mem; + + // Go left. + if (mem < nodeMem) { + node = static_cast(node->node[0]); + continue; + } + + // Go right. + uint8_t* nodeEnd = nodeMem + node->size; + if (mem >= nodeEnd) { + node = static_cast(node->node[1]); + continue; + } + + // Match. + break; + } + return node; +} + +static void* vMemMgrAllocPermanent(VMemMgr* self, size_t vSize) noexcept { + static const size_t permanentAlignment = 32; + static const size_t permanentNodeSize = 32768; + + vSize = Utils::alignTo(vSize, permanentAlignment); + + AutoLock locked(self->_lock); + PermanentNode* node = self->_permanent; + + // Try to find space in allocated chunks. + while (node && vSize > node->getAvailable()) + node = node->prev; + + // Or allocate new node. + if (node == nullptr) { + size_t nodeSize = permanentNodeSize; + + if (nodeSize < vSize) + nodeSize = vSize; + + node = static_cast(ASMJIT_ALLOC(sizeof(PermanentNode))); + + // Out of memory. + if (node == nullptr) + return nullptr; + + node->mem = vMemMgrAllocVMem(self, nodeSize, &node->size); + + // Out of memory. + if (node->mem == nullptr) { + ASMJIT_FREE(node); + return nullptr; + } + + node->used = 0; + node->prev = self->_permanent; + self->_permanent = node; + } + + // Finally, copy function code to our space we reserved for. + uint8_t* result = node->mem + node->used; + + // Update Statistics. + node->used += vSize; + self->_usedBytes += vSize; + + // Code can be null to only reserve space for code. + return static_cast(result); +} + +static void* vMemMgrAllocFreeable(VMemMgr* self, size_t vSize) noexcept { + // Current index. + size_t i; + + // How many we need to be freed. + size_t need; + size_t minVSize; + + // Align to 32 bytes by default. + vSize = Utils::alignTo(vSize, 32); + if (vSize == 0) + return nullptr; + + AutoLock locked(self->_lock); + MemNode* node = self->_optimal; + minVSize = self->_blockSize; + + // Try to find memory block in existing nodes. + while (node) { + // Skip this node? + if ((node->getAvailable() < vSize) || (node->largestBlock < vSize && node->largestBlock != 0)) { + MemNode* next = node->next; + + if (node->getAvailable() < minVSize && node == self->_optimal && next) + self->_optimal = next; + + node = next; + continue; + } + + size_t* up = node->baUsed; // Current ubits address. + size_t ubits; // Current ubits[0] value. + size_t bit; // Current bit mask. + size_t blocks = node->blocks; // Count of blocks in node. + size_t cont = 0; // How many bits are currently freed in find loop. + size_t maxCont = 0; // Largest continuous block (bits count). + size_t j; + + need = M_DIV((vSize + node->density - 1), node->density); + i = 0; + + // Try to find node that is large enough. + while (i < blocks) { + ubits = *up++; + + // Fast skip used blocks. + if (ubits == ~(size_t)0) { + if (cont > maxCont) + maxCont = cont; + cont = 0; + + i += kBitsPerEntity; + continue; + } + + size_t max = kBitsPerEntity; + if (i + max > blocks) + max = blocks - i; + + for (j = 0, bit = 1; j < max; bit <<= 1) { + j++; + if ((ubits & bit) == 0) { + if (++cont == need) { + i += j; + i -= cont; + goto L_Found; + } + + continue; + } + + if (cont > maxCont) maxCont = cont; + cont = 0; + } + + i += kBitsPerEntity; + } + + // Because we traversed the entire node, we can set largest node size that + // will be used to cache next traversing. + node->largestBlock = maxCont * node->density; + + node = node->next; + } + + // If we are here, we failed to find existing memory block and we must + // allocate a new one. + { + size_t blockSize = self->_blockSize; + if (blockSize < vSize) + blockSize = vSize; + + node = vMemMgrCreateNode(self, blockSize, self->_blockDensity); + if (node == nullptr) + return nullptr; + + // Update binary tree. + vMemMgrInsertNode(self, node); + ASMJIT_ASSERT(vMemMgrCheckTree(self)); + + // Alloc first node at start. + i = 0; + need = (vSize + node->density - 1) / node->density; + + // Update statistics. + self->_allocatedBytes += node->size; + } + +L_Found: + // Update bits. + _SetBits(node->baUsed, i, need); + _SetBits(node->baCont, i, need - 1); + + // Update statistics. + { + size_t u = need * node->density; + node->used += u; + node->largestBlock = 0; + self->_usedBytes += u; + } + + // And return pointer to allocated memory. + uint8_t* result = node->mem + i * node->density; + ASMJIT_ASSERT(result >= node->mem && result <= node->mem + node->size - vSize); + return result; +} + +//! \internal +//! +//! Reset the whole `VMemMgr` instance, freeing all heap memory allocated an +//! virtual memory allocated unless `keepVirtualMemory` is true (and this is +//! only used when writing data to a remote process). +static void vMemMgrReset(VMemMgr* self, bool keepVirtualMemory) noexcept { + MemNode* node = self->_first; + + while (node != nullptr) { + MemNode* next = node->next; + + if (!keepVirtualMemory) + vMemMgrReleaseVMem(self, node->mem, node->size); + + ASMJIT_FREE(node->baUsed); + ASMJIT_FREE(node); + + node = next; + } + + self->_allocatedBytes = 0; + self->_usedBytes = 0; + + self->_root = nullptr; + self->_first = nullptr; + self->_last = nullptr; + self->_optimal = nullptr; +} + +// ============================================================================ +// [asmjit::VMemMgr - Construction / Destruction] +// ============================================================================ + +#if !ASMJIT_OS_WINDOWS +VMemMgr::VMemMgr() noexcept +#else +VMemMgr::VMemMgr(HANDLE hProcess) noexcept + : _hProcess(vMemGet().getSafeProcessHandle(hProcess)) +#endif // ASMJIT_OS_WINDOWS +{ + _blockSize = VMemUtil::getPageGranularity(); + _blockDensity = 64; + + _allocatedBytes = 0; + _usedBytes = 0; + + _root = nullptr; + _first = nullptr; + _last = nullptr; + _optimal = nullptr; + + _permanent = nullptr; + _keepVirtualMemory = false; +} + +VMemMgr::~VMemMgr() noexcept { + // Freeable memory cleanup - Also frees the virtual memory if configured to. + vMemMgrReset(this, _keepVirtualMemory); + + // Permanent memory cleanup - Never frees the virtual memory. + PermanentNode* node = _permanent; + while (node) { + PermanentNode* prev = node->prev; + ASMJIT_FREE(node); + node = prev; + } +} + +// ============================================================================ +// [asmjit::VMemMgr - Reset] +// ============================================================================ + +void VMemMgr::reset() noexcept { + vMemMgrReset(this, false); +} + +// ============================================================================ +// [asmjit::VMemMgr - Alloc / Release] +// ============================================================================ + +void* VMemMgr::alloc(size_t size, uint32_t type) noexcept { + if (type == kVMemAllocPermanent) + return vMemMgrAllocPermanent(this, size); + else + return vMemMgrAllocFreeable(this, size); +} + +Error VMemMgr::release(void* p) noexcept { + if (p == nullptr) + return kErrorOk; + + AutoLock locked(_lock); + MemNode* node = vMemMgrFindNodeByPtr(this, static_cast(p)); + + if (node == nullptr) + return kErrorInvalidArgument; + + size_t offset = (size_t)((uint8_t*)p - (uint8_t*)node->mem); + size_t bitpos = M_DIV(offset, node->density); + size_t i = (bitpos / kBitsPerEntity); + + size_t* up = node->baUsed + i; // Current ubits address. + size_t* cp = node->baCont + i; // Current cbits address. + size_t ubits = *up; // Current ubits[0] value. + size_t cbits = *cp; // Current cbits[0] value. + size_t bit = (size_t)1 << (bitpos % kBitsPerEntity); + + size_t cont = 0; + bool stop; + + for (;;) { + stop = (cbits & bit) == 0; + ubits &= ~bit; + cbits &= ~bit; + + bit <<= 1; + cont++; + + if (stop || bit == 0) { + *up = ubits; + *cp = cbits; + if (stop) + break; + + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // If the freed block is fully allocated node then it's needed to + // update 'optimal' pointer in memory manager. + if (node->used == node->size) { + MemNode* cur = _optimal; + + do { + cur = cur->prev; + if (cur == node) { + _optimal = node; + break; + } + } while (cur); + } + + // Statistics. + cont *= node->density; + if (node->largestBlock < cont) + node->largestBlock = cont; + + node->used -= cont; + _usedBytes -= cont; + + // If page is empty, we can free it. + if (node->used == 0) { + // Free memory associated with node (this memory is not accessed + // anymore so it's safe). + vMemMgrReleaseVMem(this, node->mem, node->size); + ASMJIT_FREE(node->baUsed); + + node->baUsed = nullptr; + node->baCont = nullptr; + + // Statistics. + _allocatedBytes -= node->size; + + // Remove node. This function can return different node than + // passed into, but data is copied into previous node if needed. + ASMJIT_FREE(vMemMgrRemoveNode(this, node)); + ASMJIT_ASSERT(vMemMgrCheckTree(this)); + } + + return kErrorOk; +} + +Error VMemMgr::shrink(void* p, size_t used) noexcept { + if (p == nullptr) + return kErrorOk; + + if (used == 0) + return release(p); + + AutoLock locked(_lock); + + MemNode* node = vMemMgrFindNodeByPtr(this, (uint8_t*)p); + if (node == nullptr) + return kErrorInvalidArgument; + + size_t offset = (size_t)((uint8_t*)p - (uint8_t*)node->mem); + size_t bitpos = M_DIV(offset, node->density); + size_t i = (bitpos / kBitsPerEntity); + + size_t* up = node->baUsed + i; // Current ubits address. + size_t* cp = node->baCont + i; // Current cbits address. + size_t ubits = *up; // Current ubits[0] value. + size_t cbits = *cp; // Current cbits[0] value. + size_t bit = (size_t)1 << (bitpos % kBitsPerEntity); + + size_t cont = 0; + size_t usedBlocks = (used + node->density - 1) / node->density; + + bool stop; + + // Find the first block we can mark as free. + for (;;) { + stop = (cbits & bit) == 0; + if (stop) + return kErrorOk; + + if (++cont == usedBlocks) + break; + + bit <<= 1; + if (bit == 0) { + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // Free the tail blocks. + cont = ~(size_t)0; + goto _EnterFreeLoop; + + for (;;) { + stop = (cbits & bit) == 0; + ubits &= ~bit; + +_EnterFreeLoop: + cbits &= ~bit; + + bit <<= 1; + cont++; + + if (stop || bit == 0) { + *up = ubits; + *cp = cbits; + if (stop) + break; + + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // Statistics. + cont *= node->density; + if (node->largestBlock < cont) + node->largestBlock = cont; + + node->used -= cont; + _usedBytes -= cont; + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::VMem - Test] +// ============================================================================ + +#if defined(ASMJIT_TEST) +static void VMemTest_fill(void* a, void* b, int i) noexcept { + int pattern = rand() % 256; + *(int *)a = i; + *(int *)b = i; + ::memset((char*)a + sizeof(int), pattern, i - sizeof(int)); + ::memset((char*)b + sizeof(int), pattern, i - sizeof(int)); +} + +static void VMemTest_verify(void* a, void* b) noexcept { + int ai = *(int*)a; + int bi = *(int*)b; + + EXPECT(ai == bi, + "The length of 'a' (%d) and 'b' (%d) should be same", ai, bi); + + EXPECT(::memcmp(a, b, ai) == 0, + "Pattern (%p) doesn't match", a); +} + +static void VMemTest_stats(VMemMgr& memmgr) noexcept { + INFO("Used : %u", static_cast(memmgr.getUsedBytes())); + INFO("Allocated: %u", static_cast(memmgr.getAllocatedBytes())); +} + +static void VMemTest_shuffle(void** a, void** b, size_t count) noexcept { + for (size_t i = 0; i < count; ++i) { + size_t si = (size_t)rand() % count; + + void* ta = a[i]; + void* tb = b[i]; + + a[i] = a[si]; + b[i] = b[si]; + + a[si] = ta; + b[si] = tb; + } +} + +UNIT(base_vmem) { + VMemMgr memmgr; + + // Should be predictible. + srand(100); + + int i; + int kCount = 200000; + + INFO("Memory alloc/free test - %d allocations.", static_cast(kCount)); + + void** a = (void**)ASMJIT_ALLOC(sizeof(void*) * kCount); + void** b = (void**)ASMJIT_ALLOC(sizeof(void*) * kCount); + + EXPECT(a != nullptr && b != nullptr, + "Couldn't allocate %u bytes on heap.", kCount * 2); + + INFO("Allocating virtual memory..."); + for (i = 0; i < kCount; i++) { + int r = (rand() % 1000) + 4; + + a[i] = memmgr.alloc(r); + EXPECT(a[i] != nullptr, + "Couldn't allocate %d bytes of virtual memory", r); + ::memset(a[i], 0, r); + } + VMemTest_stats(memmgr); + + INFO("Freeing virtual memory..."); + for (i = 0; i < kCount; i++) { + EXPECT(memmgr.release(a[i]) == kErrorOk, + "Failed to free %p.", b[i]); + } + VMemTest_stats(memmgr); + + INFO("Verified alloc/free test - %d allocations.", static_cast(kCount)); + for (i = 0; i < kCount; i++) { + int r = (rand() % 1000) + 4; + + a[i] = memmgr.alloc(r); + EXPECT(a[i] != nullptr, + "Couldn't allocate %d bytes of virtual memory.", r); + + b[i] = ASMJIT_ALLOC(r); + EXPECT(b[i] != nullptr, + "Couldn't allocate %d bytes on heap.", r); + + VMemTest_fill(a[i], b[i], r); + } + VMemTest_stats(memmgr); + + INFO("Shuffling..."); + VMemTest_shuffle(a, b, kCount); + + INFO("Verify and free..."); + for (i = 0; i < kCount / 2; i++) { + VMemTest_verify(a[i], b[i]); + EXPECT(memmgr.release(a[i]) == kErrorOk, + "Failed to free %p.", a[i]); + ASMJIT_FREE(b[i]); + } + VMemTest_stats(memmgr); + + INFO("Alloc again."); + for (i = 0; i < kCount / 2; i++) { + int r = (rand() % 1000) + 4; + + a[i] = memmgr.alloc(r); + EXPECT(a[i] != nullptr, + "Couldn't allocate %d bytes of virtual memory.", r); + + b[i] = ASMJIT_ALLOC(r); + EXPECT(b[i] != nullptr, + "Couldn't allocate %d bytes on heap."); + + VMemTest_fill(a[i], b[i], r); + } + VMemTest_stats(memmgr); + + INFO("Verify and free..."); + for (i = 0; i < kCount; i++) { + VMemTest_verify(a[i], b[i]); + EXPECT(memmgr.release(a[i]) == kErrorOk, + "Failed to free %p.", a[i]); + ASMJIT_FREE(b[i]); + } + VMemTest_stats(memmgr); + + ASMJIT_FREE(a); + ASMJIT_FREE(b); +} +#endif // ASMJIT_TEST + +} // asmjit namespace diff --git a/DynamicHooks/thirdparty/AsmJit/base/vmem.h b/DynamicHooks/thirdparty/AsmJit/base/vmem.h new file mode 100644 index 0000000..b36031e --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/vmem.h @@ -0,0 +1,233 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_VMEM_H +#define _ASMJIT_BASE_VMEM_H + +// [Dependencies] +#include "../base/utils.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::VMemAllocType] +// ============================================================================ + +//! Type of virtual memory allocation, see `VMemMgr::alloc()`. +ASMJIT_ENUM(VMemAllocType) { + //! Normal memory allocation, has to be freed by `VMemMgr::release()`. + kVMemAllocFreeable = 0, + //! Allocate permanent memory, can't be freed. + kVMemAllocPermanent = 1 +}; + +// ============================================================================ +// [asmjit::VMemFlags] +// ============================================================================ + +//! Type of virtual memory allocation, see `VMemMgr::alloc()`. +ASMJIT_ENUM(VMemFlags) { + //! Memory is writable. + kVMemFlagWritable = 0x00000001, + //! Memory is executable. + kVMemFlagExecutable = 0x00000002 +}; + +// ============================================================================ +// [asmjit::VMemUtil] +// ============================================================================ + +//! Virtual memory utilities. +//! +//! Defines functions that provide facility to allocate and free memory that is +//! executable in a platform independent manner. If both the processor and host +//! operating system support data-execution-prevention then the only way how to +//! run machine code is to allocate it to a memory that has marked as executable. +//! VMemUtil is just unified interface to platform dependent APIs. +//! +//! `VirtualAlloc()` function is used on Windows operating system and `mmap()` +//! on POSIX. `VirtualAlloc()` and `mmap()` documentation provide a detailed +//! overview on how to use a platform specific APIs. +struct VMemUtil { + //! Get a size/alignment of a single virtual memory page. + static ASMJIT_API size_t getPageSize() noexcept; + + //! Get a recommended granularity for a single `alloc` call. + static ASMJIT_API size_t getPageGranularity() noexcept; + + //! Allocate virtual memory. + //! + //! Pages are readable/writeable, but they are not guaranteed to be + //! executable unless 'canExecute' is true. Returns the address of + //! allocated memory, or `nullptr` on failure. + static ASMJIT_API void* alloc(size_t length, size_t* allocated, uint32_t flags) noexcept; + //! Free memory allocated by `alloc()`. + static ASMJIT_API Error release(void* addr, size_t length) noexcept; + +#if ASMJIT_OS_WINDOWS + //! Allocate virtual memory of `hProcess` (Windows only). + static ASMJIT_API void* allocProcessMemory(HANDLE hProcess, size_t length, size_t* allocated, uint32_t flags) noexcept; + + //! Release virtual memory of `hProcess` (Windows only). + static ASMJIT_API Error releaseProcessMemory(HANDLE hProcess, void* addr, size_t length) noexcept; +#endif // ASMJIT_OS_WINDOWS +}; + +// ============================================================================ +// [asmjit::VMemMgr] +// ============================================================================ + +//! Reference implementation of memory manager that uses `VMemUtil` to allocate +//! chunks of virtual memory and bit arrays to manage it. +class VMemMgr { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +#if !ASMJIT_OS_WINDOWS + //! Create a `VMemMgr` instance. + ASMJIT_API VMemMgr() noexcept; +#else + //! Create a `VMemMgr` instance. + //! + //! NOTE: When running on Windows it's possible to specify a `hProcess` to + //! be used for memory allocation. Using `hProcess` allows to allocate memory + //! of a remote process. + ASMJIT_API VMemMgr(HANDLE hProcess = static_cast(0)) noexcept; +#endif // ASMJIT_OS_WINDOWS + + //! Destroy the `VMemMgr` instance and free all blocks. + ASMJIT_API ~VMemMgr() noexcept; + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! Free all allocated memory. + ASMJIT_API void reset() noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + +#if ASMJIT_OS_WINDOWS + //! Get the handle of the process memory manager is bound to. + ASMJIT_INLINE HANDLE getProcessHandle() const noexcept { + return _hProcess; + } +#endif // ASMJIT_OS_WINDOWS + + //! Get how many bytes are currently allocated. + ASMJIT_INLINE size_t getAllocatedBytes() const noexcept { + return _allocatedBytes; + } + + //! Get how many bytes are currently used. + ASMJIT_INLINE size_t getUsedBytes() const noexcept { + return _usedBytes; + } + + //! Get whether to keep allocated memory after the `VMemMgr` is destroyed. + //! + //! \sa \ref setKeepVirtualMemory. + ASMJIT_INLINE bool getKeepVirtualMemory() const noexcept { + return _keepVirtualMemory; + } + + //! Set whether to keep allocated memory after memory manager is + //! destroyed. + //! + //! This method is usable when patching code of remote process. You need to + //! allocate process memory, store generated assembler into it and patch the + //! method you want to redirect (into your code). This method affects only + //! VMemMgr destructor. After destruction all internal + //! structures are freed, only the process virtual memory remains. + //! + //! NOTE: Memory allocated with kVMemAllocPermanent is always kept. + //! + //! \sa \ref getKeepVirtualMemory. + ASMJIT_INLINE void setKeepVirtualMemory(bool keepVirtualMemory) noexcept { + _keepVirtualMemory = keepVirtualMemory; + } + + // -------------------------------------------------------------------------- + // [Alloc / Release] + // -------------------------------------------------------------------------- + + //! Allocate a `size` bytes of virtual memory. + //! + //! Note that if you are implementing your own virtual memory manager then you + //! can quitly ignore type of allocation. This is mainly for AsmJit to memory + //! manager that allocated memory will be never freed. + ASMJIT_API void* alloc(size_t size, uint32_t type = kVMemAllocFreeable) noexcept; + + //! Free previously allocated memory at a given `address`. + ASMJIT_API Error release(void* p) noexcept; + + //! Free extra memory allocated with `p`. + ASMJIT_API Error shrink(void* p, size_t used) noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +#if ASMJIT_OS_WINDOWS + //! Process passed to `VirtualAllocEx` and `VirtualFree`. + HANDLE _hProcess; +#endif // ASMJIT_OS_WINDOWS + + //! Lock to enable thread-safe functionality. + Lock _lock; + + //! Default block size. + size_t _blockSize; + //! Default block density. + size_t _blockDensity; + + // Whether to keep virtual memory after destroy. + bool _keepVirtualMemory; + + //! How many bytes are currently allocated. + size_t _allocatedBytes; + //! How many bytes are currently used. + size_t _usedBytes; + + //! \internal + //! \{ + + struct RbNode; + struct MemNode; + struct PermanentNode; + + // Memory nodes root. + MemNode* _root; + // Memory nodes list. + MemNode* _first; + MemNode* _last; + MemNode* _optimal; + // Permanent memory. + PermanentNode* _permanent; + + //! \} +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_VMEM_H diff --git a/DynamicHooks/thirdparty/AsmJit/base/zone.cpp b/DynamicHooks/thirdparty/AsmJit/base/zone.cpp new file mode 100644 index 0000000..bef1a8a --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/zone.cpp @@ -0,0 +1,193 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies] +#include "../base/utils.h" +#include "../base/zone.h" +#include + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! Zero size block used by `Zone` that doesn't have any memory allocated. +static const Zone::Block Zone_zeroBlock = { + nullptr, nullptr, nullptr, nullptr, { 0 } +}; + +// ============================================================================ +// [asmjit::Zone - Construction / Destruction] +// ============================================================================ + +Zone::Zone(size_t blockSize) noexcept { + _block = const_cast(&Zone_zeroBlock); + _blockSize = blockSize; +} + +Zone::~Zone() noexcept { + reset(true); +} + +// ============================================================================ +// [asmjit::Zone - Reset] +// ============================================================================ + +void Zone::reset(bool releaseMemory) noexcept { + Block* cur = _block; + + // Can't be altered. + if (cur == &Zone_zeroBlock) + return; + + if (releaseMemory) { + // Since cur can be in the middle of the double-linked list, we have to + // traverse to both directions `prev` and `next` separately. + Block* next = cur->next; + do { + Block* prev = cur->prev; + ASMJIT_FREE(cur); + cur = prev; + } while (cur != nullptr); + + cur = next; + while (cur != nullptr) { + next = cur->next; + ASMJIT_FREE(cur); + cur = next; + } + + _block = const_cast(&Zone_zeroBlock); + } + else { + while (cur->prev != nullptr) + cur = cur->prev; + + cur->pos = cur->data; + _block = cur; + } +} + +// ============================================================================ +// [asmjit::Zone - Alloc] +// ============================================================================ + +void* Zone::_alloc(size_t size) noexcept { + Block* curBlock = _block; + size_t blockSize = Utils::iMax(_blockSize, size); + + // The `_alloc()` method can only be called if there is not enough space + // in the current block, see `alloc()` implementation for more details. + ASMJIT_ASSERT(curBlock == &Zone_zeroBlock || curBlock->getRemainingSize() < size); + + // If the `Zone` has been reset the current block doesn't have to be the + // last one. Check if there is a block that can be used instead of allocating + // a new one. If there is a `next` block it's completely unused, we don't have + // to check for remaining bytes. + Block* next = curBlock->next; + if (next != nullptr && next->getBlockSize() >= size) { + next->pos = next->data + size; + _block = next; + return static_cast(next->data); + } + + // Prevent arithmetic overflow. + if (blockSize > ~static_cast(0) - sizeof(Block)) + return nullptr; + + Block* newBlock = static_cast(ASMJIT_ALLOC(sizeof(Block) - sizeof(void*) + blockSize)); + if (newBlock == nullptr) + return nullptr; + + newBlock->pos = newBlock->data + size; + newBlock->end = newBlock->data + blockSize; + newBlock->prev = nullptr; + newBlock->next = nullptr; + + if (curBlock != &Zone_zeroBlock) { + newBlock->prev = curBlock; + curBlock->next = newBlock; + + // Does only happen if there is a next block, but the requested memory + // can't fit into it. In this case a new buffer is allocated and inserted + // between the current block and the next one. + if (next != nullptr) { + newBlock->next = next; + next->prev = newBlock; + } + } + + _block = newBlock; + return static_cast(newBlock->data); +} + +void* Zone::allocZeroed(size_t size) noexcept { + void* p = alloc(size); + if (p != nullptr) + ::memset(p, 0, size); + return p; +} + +void* Zone::dup(const void* data, size_t size) noexcept { + if (data == nullptr) + return nullptr; + + if (size == 0) + return nullptr; + + void* m = alloc(size); + if (m == nullptr) + return nullptr; + + ::memcpy(m, data, size); + return m; +} + +char* Zone::sdup(const char* str) noexcept { + if (str == nullptr) + return nullptr; + + size_t len = ::strlen(str); + if (len == 0) + return nullptr; + + // Include NULL terminator and limit string length. + if (++len > 256) + len = 256; + + char* m = static_cast(alloc(len)); + if (m == nullptr) + return nullptr; + + ::memcpy(m, str, len); + m[len - 1] = '\0'; + return m; +} + +char* Zone::sformat(const char* fmt, ...) noexcept { + if (fmt == nullptr) + return nullptr; + + char buf[512]; + size_t len; + + va_list ap; + va_start(ap, fmt); + + len = vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf) - 1, fmt, ap); + buf[len++] = 0; + + va_end(ap); + return static_cast(dup(buf, len)); +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" diff --git a/DynamicHooks/thirdparty/AsmJit/base/zone.h b/DynamicHooks/thirdparty/AsmJit/base/zone.h new file mode 100644 index 0000000..9bbb50c --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/base/zone.h @@ -0,0 +1,220 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_ZONE_H +#define _ASMJIT_BASE_ZONE_H + +// [Dependencies] +#include "../base/globals.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_base +//! \{ + +// ============================================================================ +// [asmjit::Zone] +// ============================================================================ + +//! Zone memory allocator. +//! +//! Zone is an incremental memory allocator that allocates memory by simply +//! incrementing a pointer. It allocates blocks of memory by using standard +//! C library `malloc/free`, but divides these blocks into smaller segments +//! requirested by calling `Zone::alloc()` and friends. +//! +//! Zone memory allocators are designed to allocate data of short lifetime. The +//! data used by `Assembler` and `Compiler` has a very short lifetime, thus, is +//! allocated by `Zone`. The advantage is that `Zone` can free all of the data +//! allocated at once by calling `reset()` or by `Zone` destructor. +class Zone { + public: + //! \internal + //! + //! A single block of memory. + struct Block { + // ------------------------------------------------------------------------ + // [Accessors] + // ------------------------------------------------------------------------ + + //! Get the size of the block. + ASMJIT_INLINE size_t getBlockSize() const noexcept { + return (size_t)(end - data); + } + + //! Get count of remaining bytes in the block. + ASMJIT_INLINE size_t getRemainingSize() const noexcept { + return (size_t)(end - pos); + } + + // ------------------------------------------------------------------------ + // [Members] + // ------------------------------------------------------------------------ + + //! Current data pointer (pointer to the first available byte). + uint8_t* pos; + //! End data pointer (pointer to the first invalid byte). + uint8_t* end; + + //! Link to the previous block. + Block* prev; + //! Link to the next block. + Block* next; + + //! Data. + uint8_t data[sizeof(void*)]; + }; + + enum { + //! Zone allocator overhead. + kZoneOverhead = + kMemAllocOverhead + + static_cast(sizeof(Block) - sizeof(void*)) + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new instance of `Zone` allocator. + //! + //! The `blockSize` parameter describes the default size of the block. If the + //! `size` parameter passed to `alloc()` is greater than the default size + //! `Zone` will allocate and use a larger block, but it will not change the + //! default `blockSize`. + //! + //! It's not required, but it's good practice to set `blockSize` to a + //! reasonable value that depends on the usage of `Zone`. Greater block sizes + //! are generally safer and performs better than unreasonably low values. + ASMJIT_API Zone(size_t blockSize) noexcept; + + //! Destroy the `Zone` instance. + //! + //! This will destroy the `Zone` instance and release all blocks of memory + //! allocated by it. It performs implicit `reset(true)`. + ASMJIT_API ~Zone() noexcept; + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! Reset the `Zone` invalidating all blocks allocated. + //! + //! If `releaseMemory` is true all buffers will be released to the system. + ASMJIT_API void reset(bool releaseMemory = false) noexcept; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the default block size. + ASMJIT_INLINE size_t getBlockSize() const noexcept { + return _blockSize; + } + + // -------------------------------------------------------------------------- + // [Alloc] + // -------------------------------------------------------------------------- + + //! Allocate `size` bytes of memory. + //! + //! Pointer returned is valid until the `Zone` instance is destroyed or reset + //! by calling `reset()`. If you plan to make an instance of C++ from the + //! given pointer use placement `new` and `delete` operators: + //! + //! ~~~ + //! using namespace asmjit; + //! + //! class Object { ... }; + //! + //! // Create Zone with default block size of approximately 65536 bytes. + //! Zone zone(65536 - Zone::kZoneOverhead); + //! + //! // Create your objects using zone object allocating, for example: + //! Object* obj = static_cast( zone.alloc(sizeof(Object)) ); + // + //! if (obj == nullptr) { + //! // Handle out of memory error. + //! } + //! + //! // Placement `new` and `delete` operators can be used to instantiate it. + //! new(obj) Object(); + //! + //! // ... lifetime of your objects ... + //! + //! // To destroy the instance (if required). + //! obj->~Object(); + //! + //! // Reset or destroy `Zone`. + //! zone.reset(); + //! ~~~ + ASMJIT_INLINE void* alloc(size_t size) noexcept { + Block* cur = _block; + + uint8_t* ptr = cur->pos; + size_t remainingBytes = (size_t)(cur->end - ptr); + + if (remainingBytes < size) + return _alloc(size); + + cur->pos += size; + ASMJIT_ASSERT(cur->pos <= cur->end); + + return (void*)ptr; + } + + //! Allocate `size` bytes of zeroed memory. + //! + //! See \ref alloc() for more details. + ASMJIT_API void* allocZeroed(size_t size) noexcept; + + //! Like `alloc()`, but the return pointer is casted to `T*`. + template + ASMJIT_INLINE T* allocT(size_t size = sizeof(T)) noexcept { + return static_cast(alloc(size)); + } + + //! Like `allocZeroed()`, but the return pointer is casted to `T*`. + template + ASMJIT_INLINE T* allocZeroedT(size_t size = sizeof(T)) noexcept { + return static_cast(allocZeroed(size)); + } + + //! \internal + ASMJIT_API void* _alloc(size_t size) noexcept; + + //! Helper to duplicate data. + ASMJIT_API void* dup(const void* data, size_t size) noexcept; + + //! Helper to duplicate string. + ASMJIT_API char* sdup(const char* str) noexcept; + + //! Helper to duplicate formatted string, maximum length is 256 bytes. + ASMJIT_API char* sformat(const char* str, ...) noexcept; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! The current block. + Block* _block; + //! Default block size. + size_t _blockSize; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_ZONE_H diff --git a/DynamicHooks/thirdparty/AsmJit/build.h b/DynamicHooks/thirdparty/AsmJit/build.h new file mode 100644 index 0000000..00a0be7 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/build.h @@ -0,0 +1,928 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BUILD_H +#define _ASMJIT_BUILD_H + +// ============================================================================ +// [asmjit::Build - Configuration] +// ============================================================================ + +// AsmJit is by default compiled only for a host processor for the purpose of +// JIT code generation. Both Assembler and Compiler code generators are compiled +// by default. Preprocessor macros can be used to change the default behavior. + +// External Config File +// -------------------- +// +// Define in case your configuration is generated in an external file to be +// included. + +#if defined(ASMJIT_CONFIG_FILE) +# include ASMJIT_CONFIG_FILE +#endif // ASMJIT_CONFIG_FILE + +// AsmJit Static Builds and Embedding +// ---------------------------------- +// +// These definitions can be used to enable static library build. Embed is used +// when AsmJit's source code is embedded directly in another project, implies +// static build as well. +// +// #define ASMJIT_EMBED // Asmjit is embedded (implies ASMJIT_STATIC). +// #define ASMJIT_STATIC // Define to enable static-library build. + +// AsmJit Build Modes +// ------------------ +// +// These definitions control the build mode and tracing support. The build mode +// should be auto-detected at compile time, but it's possible to override it in +// case that the auto-detection fails. +// +// Tracing is a feature that is never compiled by default and it's only used to +// debug AsmJit itself. +// +// #define ASMJIT_DEBUG // Define to enable debug-mode. +// #define ASMJIT_RELEASE // Define to enable release-mode. +// #define ASMJIT_TRACE // Define to enable tracing. + +// AsmJit Build Backends +// --------------------- +// +// These definitions control which backends to compile. If none of these is +// defined AsmJit will use host architecture by default (for JIT code generation). +// +// #define ASMJIT_BUILD_X86 // Define to enable x86 instruction set (32-bit). +// #define ASMJIT_BUILD_X64 // Define to enable x64 instruction set (64-bit). +// #define ASMJIT_BUILD_HOST // Define to enable host instruction set. + +// AsmJit Build Features +// --------------------- +// +// Flags can be defined to disable standard features. These are handy especially +// when building asmjit statically and some features are not needed or unwanted +// (like Compiler). +// +// AsmJit features are enabled by default. +// #define ASMJIT_DISABLE_COMPILER // Disable Compiler (completely). +// #define ASMJIT_DISABLE_LOGGER // Disable Logger (completely). +// #define ASMJIT_DISABLE_TEXT // Disable everything that contains text +// // representation (instructions, errors, ...). + +// Prevent compile-time errors caused by misconfiguration. +#if defined(ASMJIT_DISABLE_TEXT) && !defined(ASMJIT_DISABLE_LOGGER) +# error "[asmjit] ASMJIT_DISABLE_TEXT requires ASMJIT_DISABLE_LOGGER to be defined." +#endif // ASMJIT_DISABLE_TEXT && !ASMJIT_DISABLE_LOGGER + +// Detect ASMJIT_DEBUG and ASMJIT_RELEASE if not forced from outside. +#if !defined(ASMJIT_DEBUG) && !defined(ASMJIT_RELEASE) && !defined(NDEBUG) +# define ASMJIT_DEBUG +#else +# define ASMJIT_RELEASE +#endif + +// ASMJIT_EMBED implies ASMJIT_STATIC. +#if defined(ASMJIT_EMBED) && !defined(ASMJIT_STATIC) +# define ASMJIT_STATIC +#endif + +// ============================================================================ +// [asmjit::Build - VERSION] +// ============================================================================ + +// [@VERSION{@] +#define ASMJIT_VERSION_MAJOR 1 +#define ASMJIT_VERSION_MINOR 0 +#define ASMJIT_VERSION_PATCH 0 +#define ASMJIT_VERSION_STRING "1.0.0" +// [@VERSION}@] + +// ============================================================================ +// [asmjit::Build - WIN32] +// ============================================================================ + +// [@WIN32_CRT_NO_DEPRECATE{@] +#if defined(_MSC_VER) && defined(ASMJIT_EXPORTS) +# if !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +# endif +# if !defined(_CRT_SECURE_NO_WARNINGS) +# define _CRT_SECURE_NO_WARNINGS +# endif +#endif +// [@WIN32_CRT_NO_DEPRECATE}@] + +// [@WIN32_LEAN_AND_MEAN{@] +#if (defined(_WIN32) || defined(_WINDOWS)) && !defined(_WINDOWS_) +# if !defined(WIN32_LEAN_AND_MEAN) +# define WIN32_LEAN_AND_MEAN +# define ASMJIT_UNDEF_WIN32_LEAN_AND_MEAN +# endif +# if !defined(NOMINMAX) +# define NOMINMAX +# define ASMJIT_UNDEF_NOMINMAX +# endif +# include +# if defined(ASMJIT_UNDEF_NOMINMAX) +# undef NOMINMAX +# undef ASMJIT_UNDEF_NOMINMAX +# endif +# if defined(ASMJIT_UNDEF_WIN32_LEAN_AND_MEAN) +# undef WIN32_LEAN_AND_MEAN +# undef ASMJIT_UNDEF_WIN32_LEAN_AND_MEAN +# endif +#endif +// [@WIN32_LEAN_AND_MEAN}@] + +// ============================================================================ +// [asmjit::Build - OS] +// ============================================================================ + +// [@OS{@] +#if defined(_WIN32) || defined(_WINDOWS) +#define ASMJIT_OS_WINDOWS (1) +#else +#define ASMJIT_OS_WINDOWS (0) +#endif + +#if defined(__APPLE__) +# include +# define ASMJIT_OS_MAC (TARGET_OS_MAC) +# define ASMJIT_OS_IOS (TARGET_OS_IPHONE) +#else +# define ASMJIT_OS_MAC (0) +# define ASMJIT_OS_IOS (0) +#endif + +#if defined(__ANDROID__) +# define ASMJIT_OS_ANDROID (1) +#else +# define ASMJIT_OS_ANDROID (0) +#endif + +#if defined(__linux__) || defined(__ANDROID__) +# define ASMJIT_OS_LINUX (1) +#else +# define ASMJIT_OS_LINUX (0) +#endif + +#if defined(__DragonFly__) +# define ASMJIT_OS_DRAGONFLYBSD (1) +#else +# define ASMJIT_OS_DRAGONFLYBSD (0) +#endif + +#if defined(__FreeBSD__) +# define ASMJIT_OS_FREEBSD (1) +#else +# define ASMJIT_OS_FREEBSD (0) +#endif + +#if defined(__NetBSD__) +# define ASMJIT_OS_NETBSD (1) +#else +# define ASMJIT_OS_NETBSD (0) +#endif + +#if defined(__OpenBSD__) +# define ASMJIT_OS_OPENBSD (1) +#else +# define ASMJIT_OS_OPENBSD (0) +#endif + +#if defined(__QNXNTO__) +# define ASMJIT_OS_QNX (1) +#else +# define ASMJIT_OS_QNX (0) +#endif + +#if defined(__sun) +# define ASMJIT_OS_SOLARIS (1) +#else +# define ASMJIT_OS_SOLARIS (0) +#endif + +#if defined(__CYGWIN__) +# define ASMJIT_OS_CYGWIN (1) +#else +# define ASMJIT_OS_CYGWIN (0) +#endif + +#define ASMJIT_OS_BSD ( \ + ASMJIT_OS_FREEBSD || \ + ASMJIT_OS_DRAGONFLYBSD || \ + ASMJIT_OS_NETBSD || \ + ASMJIT_OS_OPENBSD || \ + ASMJIT_OS_MAC) +#define ASMJIT_OS_POSIX (!ASMJIT_OS_WINDOWS) +// [@OS}@] + +// ============================================================================ +// [asmjit::Build - ARCH] +// ============================================================================ + +// [@ARCH{@] +// \def ASMJIT_ARCH_ARM32 +// True if the target architecture is a 32-bit ARM. +// +// \def ASMJIT_ARCH_ARM64 +// True if the target architecture is a 64-bit ARM. +// +// \def ASMJIT_ARCH_X86 +// True if the target architecture is a 32-bit X86/IA32 +// +// \def ASMJIT_ARCH_X64 +// True if the target architecture is a 64-bit X64/AMD64 +// +// \def ASMJIT_ARCH_LE +// True if the target architecture is little endian. +// +// \def ASMJIT_ARCH_BE +// True if the target architecture is big endian. +// +// \def ASMJIT_ARCH_64BIT +// True if the target architecture is 64-bit. + +#if (defined(_M_X64 ) || defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(__amd64 ) || defined(__amd64__ )) +# define ASMJIT_ARCH_X64 1 +#else +# define ASMJIT_ARCH_X64 0 +#endif + +#if (defined(_M_IX86 ) || defined(__X86__ ) || defined(__i386 ) || \ + defined(__IA32__) || defined(__I86__ ) || defined(__i386__) || \ + defined(__i486__) || defined(__i586__) || defined(__i686__)) +# define ASMJIT_ARCH_X86 (!ASMJIT_ARCH_X64) +#else +# define ASMJIT_ARCH_X86 0 +#endif + +#if defined(__aarch64__) +# define ASMJIT_ARCH_ARM64 1 +#else +# define ASMJIT_ARCH_ARM64 0 +#endif + +#if (defined(_M_ARM ) || defined(__arm ) || defined(__thumb__ ) || \ + defined(_M_ARMT ) || defined(__arm__ ) || defined(__thumb2__)) +# define ASMJIT_ARCH_ARM32 (!ASMJIT_ARCH_ARM64) +#else +# define ASMJIT_ARCH_ARM32 0 +#endif + +#define ASMJIT_ARCH_LE ( \ + ASMJIT_ARCH_X86 || \ + ASMJIT_ARCH_X64 || \ + ASMJIT_ARCH_ARM32 || \ + ASMJIT_ARCH_ARM64 ) +#define ASMJIT_ARCH_BE (!(ASMJIT_ARCH_LE)) +#define ASMJIT_ARCH_64BIT (ASMJIT_ARCH_X64 || ASMJIT_ARCH_ARM64) +// [@ARCH}@] + +// [@ARCH_UNALIGNED_RW{@] +// \def ASMJIT_ARCH_UNALIGNED_16 +// True if the target architecture allows unaligned 16-bit reads and writes. +// +// \def ASMJIT_ARCH_UNALIGNED_32 +// True if the target architecture allows unaligned 32-bit reads and writes. +// +// \def ASMJIT_ARCH_UNALIGNED_64 +// True if the target architecture allows unaligned 64-bit reads and writes. + +#define ASMJIT_ARCH_UNALIGNED_16 (ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64) +#define ASMJIT_ARCH_UNALIGNED_32 (ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64) +#define ASMJIT_ARCH_UNALIGNED_64 (ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64) +// [@ARCH_UNALIGNED_RW}@] + +// ============================================================================ +// [asmjit::Build - CC] +// ============================================================================ + +// [@CC{@] +// \def ASMJIT_CC_CLANG +// True if the detected C++ compiler is CLANG (contains normalized CLANG version). +// +// \def ASMJIT_CC_CODEGEAR +// True if the detected C++ compiler is CODEGEAR or BORLAND (version not normalized). +// +// \def ASMJIT_CC_GCC +// True if the detected C++ compiler is GCC (contains normalized GCC version). +// +// \def ASMJIT_CC_MSC +// True if the detected C++ compiler is MSC (contains normalized MSC version). +// +// \def ASMJIT_CC_MINGW +// Defined to 32 or 64 in case this is a MINGW, otherwise 0. + +#define ASMJIT_CC_CLANG 0 +#define ASMJIT_CC_CODEGEAR 0 +#define ASMJIT_CC_GCC 0 +#define ASMJIT_CC_MSC 0 + +#if defined(__CODEGEARC__) +# undef ASMJIT_CC_CODEGEAR +# define ASMJIT_CC_CODEGEAR (__CODEGEARC__) +#elif defined(__BORLANDC__) +# undef ASMJIT_CC_CODEGEAR +# define ASMJIT_CC_CODEGEAR (__BORLANDC__) +#elif defined(__clang__) && defined(__clang_minor__) +# undef ASMJIT_CC_CLANG +# define ASMJIT_CC_CLANG (__clang_major__ * 10000000 + __clang_minor__ * 100000 + __clang_patchlevel__) +#elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) +# undef ASMJIT_CC_GCC +# define ASMJIT_CC_GCC (__GNUC__ * 10000000 + __GNUC_MINOR__ * 100000 + __GNUC_PATCHLEVEL__) +#elif defined(_MSC_VER) && defined(_MSC_FULL_VER) +# undef ASMJIT_CC_MSC +# if _MSC_VER == _MSC_FULL_VER / 10000 +# define ASMJIT_CC_MSC (_MSC_VER * 100000 + (_MSC_FULL_VER % 10000)) +# else +# define ASMJIT_CC_MSC (_MSC_VER * 100000 + (_MSC_FULL_VER % 100000)) +# endif +#else +# error "[asmjit] Unable to detect the C/C++ compiler." +#endif + +#if ASMJIT_CC_GCC && defined(__GXX_EXPERIMENTAL_CXX0X__) +# define ASMJIT_CC_GCC_CXX0X 1 +#else +# define ASMJIT_CC_GCC_CXX0X 0 +#endif + +#if defined(__MINGW64__) +# define ASMJIT_CC_MINGW 64 +#elif defined(__MINGW32__) +# define ASMJIT_CC_MINGW 32 +#else +# define ASMJIT_CC_MINGW 0 +#endif + +#define ASMJIT_CC_CODEGEAR_EQ(x, y, z) (ASMJIT_CC_CODEGEAR == (x << 8) + y) +#define ASMJIT_CC_CODEGEAR_GE(x, y, z) (ASMJIT_CC_CODEGEAR >= (x << 8) + y) + +#define ASMJIT_CC_CLANG_EQ(x, y, z) (ASMJIT_CC_CLANG == x * 10000000 + y * 100000 + z) +#define ASMJIT_CC_CLANG_GE(x, y, z) (ASMJIT_CC_CLANG >= x * 10000000 + y * 100000 + z) + +#define ASMJIT_CC_GCC_EQ(x, y, z) (ASMJIT_CC_GCC == x * 10000000 + y * 100000 + z) +#define ASMJIT_CC_GCC_GE(x, y, z) (ASMJIT_CC_GCC >= x * 10000000 + y * 100000 + z) + +#define ASMJIT_CC_MSC_EQ(x, y, z) (ASMJIT_CC_MSC == x * 10000000 + y * 100000 + z) +#define ASMJIT_CC_MSC_GE(x, y, z) (ASMJIT_CC_MSC >= x * 10000000 + y * 100000 + z) +// [@CC}@] + +// [@CC_FEATURES{@] +// \def ASMJIT_CC_HAS_NATIVE_CHAR +// True if the C++ compiler treats char as a native type. +// +// \def ASMJIT_CC_HAS_NATIVE_WCHAR_T +// True if the C++ compiler treats wchar_t as a native type. +// +// \def ASMJIT_CC_HAS_NATIVE_CHAR16_T +// True if the C++ compiler treats char16_t as a native type. +// +// \def ASMJIT_CC_HAS_NATIVE_CHAR32_T +// True if the C++ compiler treats char32_t as a native type. +// +// \def ASMJIT_CC_HAS_OVERRIDE +// True if the C++ compiler supports override keyword. +// +// \def ASMJIT_CC_HAS_NOEXCEPT +// True if the C++ compiler supports noexcept keyword. + +#if ASMJIT_CC_CLANG +# define ASMJIT_CC_HAS_ATTRIBUTE (1) +# define ASMJIT_CC_HAS_BUILTIN (1) +# define ASMJIT_CC_HAS_DECLSPEC (0) + +# define ASMJIT_CC_HAS_ALIGNAS (__has_extension(__cxx_alignas__)) +# define ASMJIT_CC_HAS_ALIGNOF (__has_extension(__cxx_alignof__)) +# define ASMJIT_CC_HAS_ASSUME (0) +# define ASMJIT_CC_HAS_ASSUME_ALIGNED (0) +# define ASMJIT_CC_HAS_ATTRIBUTE_ALIGNED (__has_attribute(__aligned__)) +# define ASMJIT_CC_HAS_ATTRIBUTE_ALWAYS_INLINE (__has_attribute(__always_inline__)) +# define ASMJIT_CC_HAS_ATTRIBUTE_NOINLINE (__has_attribute(__noinline__)) +# define ASMJIT_CC_HAS_ATTRIBUTE_NORETURN (__has_attribute(__noreturn__)) +# define ASMJIT_CC_HAS_BUILTIN_ASSUME (__has_builtin(__builtin_assume)) +# define ASMJIT_CC_HAS_BUILTIN_ASSUME_ALIGNED (__has_builtin(__builtin_assume_aligned)) +# define ASMJIT_CC_HAS_BUILTIN_EXPECT (__has_builtin(__builtin_expect)) +# define ASMJIT_CC_HAS_BUILTIN_UNREACHABLE (__has_builtin(__builtin_unreachable)) +# define ASMJIT_CC_HAS_CONSTEXPR (__has_extension(__cxx_constexpr__)) +# define ASMJIT_CC_HAS_DECLTYPE (__has_extension(__cxx_decltype__)) +# define ASMJIT_CC_HAS_DEFAULT_FUNCTION (__has_extension(__cxx_defaulted_functions__)) +# define ASMJIT_CC_HAS_DELETE_FUNCTION (__has_extension(__cxx_deleted_functions__)) +# define ASMJIT_CC_HAS_FINAL (__has_extension(__cxx_override_control__)) +# define ASMJIT_CC_HAS_INITIALIZER_LIST (__has_extension(__cxx_generalized_initializers__)) +# define ASMJIT_CC_HAS_LAMBDA (__has_extension(__cxx_lambdas__)) +# define ASMJIT_CC_HAS_NATIVE_CHAR (1) +# define ASMJIT_CC_HAS_NATIVE_CHAR16_T (__has_extension(__cxx_unicode_literals__)) +# define ASMJIT_CC_HAS_NATIVE_CHAR32_T (__has_extension(__cxx_unicode_literals__)) +# define ASMJIT_CC_HAS_NATIVE_WCHAR_T (1) +# define ASMJIT_CC_HAS_NOEXCEPT (__has_extension(__cxx_noexcept__)) +# define ASMJIT_CC_HAS_NULLPTR (__has_extension(__cxx_nullptr__)) +# define ASMJIT_CC_HAS_OVERRIDE (__has_extension(__cxx_override_control__)) +# define ASMJIT_CC_HAS_RVALUE (__has_extension(__cxx_rvalue_references__)) +# define ASMJIT_CC_HAS_STATIC_ASSERT (__has_extension(__cxx_static_assert__)) +#endif + +#if ASMJIT_CC_CODEGEAR +# define ASMJIT_CC_HAS_ATTRIBUTE (0) +# define ASMJIT_CC_HAS_BUILTIN (0) +# define ASMJIT_CC_HAS_DECLSPEC (1) + +# define ASMJIT_CC_HAS_ALIGNAS (0) +# define ASMJIT_CC_HAS_ALIGNOF (0) +# define ASMJIT_CC_HAS_ASSUME (0) +# define ASMJIT_CC_HAS_ASSUME_ALIGNED (0) +# define ASMJIT_CC_HAS_CONSTEXPR (0) +# define ASMJIT_CC_HAS_DECLSPEC_ALIGN (ASMJIT_CC_CODEGEAR >= 0x0610) +# define ASMJIT_CC_HAS_DECLSPEC_FORCEINLINE (0) +# define ASMJIT_CC_HAS_DECLSPEC_NOINLINE (0) +# define ASMJIT_CC_HAS_DECLSPEC_NORETURN (ASMJIT_CC_CODEGEAR >= 0x0610) +# define ASMJIT_CC_HAS_DECLTYPE (ASMJIT_CC_CODEGEAR >= 0x0610) +# define ASMJIT_CC_HAS_DEFAULT_FUNCTION (0) +# define ASMJIT_CC_HAS_DELETE_FUNCTION (0) +# define ASMJIT_CC_HAS_FINAL (0) +# define ASMJIT_CC_HAS_INITIALIZER_LIST (0) +# define ASMJIT_CC_HAS_LAMBDA (0) +# define ASMJIT_CC_HAS_NATIVE_CHAR (1) +# define ASMJIT_CC_HAS_NATIVE_CHAR16_T (0) +# define ASMJIT_CC_HAS_NATIVE_CHAR32_T (0) +# define ASMJIT_CC_HAS_NATIVE_WCHAR_T (1) +# define ASMJIT_CC_HAS_NOEXCEPT (0) +# define ASMJIT_CC_HAS_NULLPTR (0) +# define ASMJIT_CC_HAS_OVERRIDE (0) +# define ASMJIT_CC_HAS_RVALUE (ASMJIT_CC_CODEGEAR >= 0x0610) +# define ASMJIT_CC_HAS_STATIC_ASSERT (ASMJIT_CC_CODEGEAR >= 0x0610) +#endif + +#if ASMJIT_CC_GCC +# define ASMJIT_CC_HAS_ATTRIBUTE (1) +# define ASMJIT_CC_HAS_BUILTIN (1) +# define ASMJIT_CC_HAS_DECLSPEC (0) + +# define ASMJIT_CC_HAS_ALIGNAS (ASMJIT_CC_GCC_GE(4, 8, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_ALIGNOF (ASMJIT_CC_GCC_GE(4, 8, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_ASSUME (0) +# define ASMJIT_CC_HAS_ASSUME_ALIGNED (0) +# define ASMJIT_CC_HAS_ATTRIBUTE_ALIGNED (ASMJIT_CC_GCC_GE(2, 7, 0)) +# define ASMJIT_CC_HAS_ATTRIBUTE_ALWAYS_INLINE (ASMJIT_CC_GCC_GE(4, 4, 0) && !ASMJIT_CC_MINGW) +# define ASMJIT_CC_HAS_ATTRIBUTE_NOINLINE (ASMJIT_CC_GCC_GE(3, 4, 0) && !ASMJIT_CC_MINGW) +# define ASMJIT_CC_HAS_ATTRIBUTE_NORETURN (ASMJIT_CC_GCC_GE(2, 5, 0)) +# define ASMJIT_CC_HAS_BUILTIN_ASSUME (0) +# define ASMJIT_CC_HAS_BUILTIN_ASSUME_ALIGNED (ASMJIT_CC_GCC_GE(4, 7, 0)) +# define ASMJIT_CC_HAS_BUILTIN_EXPECT (1) +# define ASMJIT_CC_HAS_BUILTIN_UNREACHABLE (ASMJIT_CC_GCC_GE(4, 5, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_CONSTEXPR (ASMJIT_CC_GCC_GE(4, 6, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_DECLTYPE (ASMJIT_CC_GCC_GE(4, 3, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_DEFAULT_FUNCTION (ASMJIT_CC_GCC_GE(4, 4, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_DELETE_FUNCTION (ASMJIT_CC_GCC_GE(4, 4, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_FINAL (ASMJIT_CC_GCC_GE(4, 7, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_INITIALIZER_LIST (ASMJIT_CC_GCC_GE(4, 4, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_LAMBDA (ASMJIT_CC_GCC_GE(4, 5, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_NATIVE_CHAR (1) +# define ASMJIT_CC_HAS_NATIVE_CHAR16_T (ASMJIT_CC_GCC_GE(4, 5, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_NATIVE_CHAR32_T (ASMJIT_CC_GCC_GE(4, 5, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_NATIVE_WCHAR_T (1) +# define ASMJIT_CC_HAS_NOEXCEPT (ASMJIT_CC_GCC_GE(4, 6, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_NULLPTR (ASMJIT_CC_GCC_GE(4, 6, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_OVERRIDE (ASMJIT_CC_GCC_GE(4, 7, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_RVALUE (ASMJIT_CC_GCC_GE(4, 3, 0) && ASMJIT_CC_GCC_CXX0X) +# define ASMJIT_CC_HAS_STATIC_ASSERT (ASMJIT_CC_GCC_GE(4, 3, 0) && ASMJIT_CC_GCC_CXX0X) +#endif + +#if ASMJIT_CC_MSC +# define ASMJIT_CC_HAS_ATTRIBUTE (0) +# define ASMJIT_CC_HAS_BUILTIN (0) +# define ASMJIT_CC_HAS_DECLSPEC (1) + +# define ASMJIT_CC_HAS_ALIGNAS (ASMJIT_CC_MSC_GE(19, 0, 0)) +# define ASMJIT_CC_HAS_ALIGNOF (ASMJIT_CC_MSC_GE(19, 0, 0)) +# define ASMJIT_CC_HAS_ASSUME (1) +# define ASMJIT_CC_HAS_ASSUME_ALIGNED (0) +# define ASMJIT_CC_HAS_CONSTEXPR (ASMJIT_CC_MSC_GE(19, 0, 0)) +# define ASMJIT_CC_HAS_DECLSPEC_ALIGN (1) +# define ASMJIT_CC_HAS_DECLSPEC_FORCEINLINE (1) +# define ASMJIT_CC_HAS_DECLSPEC_NOINLINE (1) +# define ASMJIT_CC_HAS_DECLSPEC_NORETURN (1) +# define ASMJIT_CC_HAS_DECLTYPE (ASMJIT_CC_MSC_GE(16, 0, 0)) +# define ASMJIT_CC_HAS_DEFAULT_FUNCTION (ASMJIT_CC_MSC_GE(18, 0, 0)) +# define ASMJIT_CC_HAS_DELETE_FUNCTION (ASMJIT_CC_MSC_GE(18, 0, 0)) +# define ASMJIT_CC_HAS_FINAL (ASMJIT_CC_MSC_GE(14, 0, 0)) +# define ASMJIT_CC_HAS_INITIALIZER_LIST (ASMJIT_CC_MSC_GE(18, 0, 0)) +# define ASMJIT_CC_HAS_LAMBDA (ASMJIT_CC_MSC_GE(16, 0, 0)) +# define ASMJIT_CC_HAS_NATIVE_CHAR (1) +# define ASMJIT_CC_HAS_NATIVE_CHAR16_T (ASMJIT_CC_MSC_GE(19, 0, 0)) +# define ASMJIT_CC_HAS_NATIVE_CHAR32_T (ASMJIT_CC_MSC_GE(19, 0, 0)) +# if defined(_NATIVE_WCHAR_T_DEFINED) +# define ASMJIT_CC_HAS_NATIVE_WCHAR_T (1) +# else +# define ASMJIT_CC_HAS_NATIVE_WCHAR_T (0) +# endif +# define ASMJIT_CC_HAS_NOEXCEPT (ASMJIT_CC_MSC_GE(19, 0, 0)) +# define ASMJIT_CC_HAS_NULLPTR (ASMJIT_CC_MSC_GE(16, 0, 0)) +# define ASMJIT_CC_HAS_OVERRIDE (ASMJIT_CC_MSC_GE(14, 0, 0)) +# define ASMJIT_CC_HAS_RVALUE (ASMJIT_CC_MSC_GE(16, 0, 0)) +# define ASMJIT_CC_HAS_STATIC_ASSERT (ASMJIT_CC_MSC_GE(16, 0, 0)) +#endif + +#if !ASMJIT_CC_HAS_ATTRIBUTE +# define ASMJIT_CC_HAS_ATTRIBUTE_ALIGNED (0) +# define ASMJIT_CC_HAS_ATTRIBUTE_ALWAYS_INLINE (0) +# define ASMJIT_CC_HAS_ATTRIBUTE_NOINLINE (0) +# define ASMJIT_CC_HAS_ATTRIBUTE_NORETURN (0) +#endif + +#if !ASMJIT_CC_HAS_BUILTIN +# define ASMJIT_CC_HAS_BUILTIN_ASSUME (0) +# define ASMJIT_CC_HAS_BUILTIN_ASSUME_ALIGNED (0) +# define ASMJIT_CC_HAS_BUILTIN_EXPECT (0) +# define ASMJIT_CC_HAS_BUILTIN_UNREACHABLE (0) +#endif + +#if !ASMJIT_CC_HAS_DECLSPEC +# define ASMJIT_CC_HAS_DECLSPEC_ALIGN (0) +# define ASMJIT_CC_HAS_DECLSPEC_FORCEINLINE (0) +# define ASMJIT_CC_HAS_DECLSPEC_NOINLINE (0) +# define ASMJIT_CC_HAS_DECLSPEC_NORETURN (0) +#endif +// [@CC_FEATURES}@] + +// [@CC_API{@] +// \def ASMJIT_API +// The decorated function is asmjit API and should be exported. +#if !defined(ASMJIT_API) +# if defined(ASMJIT_STATIC) +# define ASMJIT_API +# elif ASMJIT_OS_WINDOWS +# if (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && !ASMJIT_CC_MINGW +# if defined(ASMJIT_EXPORTS) +# define ASMJIT_API __attribute__((__dllexport__)) +# else +# define ASMJIT_API __attribute__((__dllimport__)) +# endif +# else +# if defined(ASMJIT_EXPORTS) +# define ASMJIT_API __declspec(dllexport) +# else +# define ASMJIT_API __declspec(dllimport) +# endif +# endif +# else +# if ASMJIT_CC_CLANG || ASMJIT_CC_GCC_GE(4, 0, 0) +# define ASMJIT_API __attribute__((__visibility__("default"))) +# endif +# endif +#endif +// [@CC_API}@] + +// [@CC_VARAPI{@] +// \def ASMJIT_VARAPI +// The decorated variable is part of asmjit API and is exported. +#if !defined(ASMJIT_VARAPI) +# define ASMJIT_VARAPI extern ASMJIT_API +#endif +// [@CC_VARAPI}@] + +// [@CC_VIRTAPI{@] +// \def ASMJIT_VIRTAPI +// The decorated class has a virtual table and is part of asmjit API. +// +// This is basically a workaround. When using MSVC and marking class as DLL +// export everything gets exported, which is unwanted in most projects. MSVC +// automatically exports typeinfo and vtable if at least one symbol of the +// class is exported. However, GCC has some strange behavior that even if +// one or more symbol is exported it doesn't export typeinfo unless the +// class itself is decorated with "visibility(default)" (i.e. asmjit_API). +#if (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && !ASMJIT_OS_WINDOWS +# define ASMJIT_VIRTAPI ASMJIT_API +#else +# define ASMJIT_VIRTAPI +#endif +// [@CC_VIRTAPI}@] + +// [@CC_INLINE{@] +// \def ASMJIT_INLINE +// Always inline the decorated function. +#if ASMJIT_CC_HAS_ATTRIBUTE_ALWAYS_INLINE +# define ASMJIT_INLINE inline __attribute__((__always_inline__)) +#elif ASMJIT_CC_HAS_DECLSPEC_FORCEINLINE +# define ASMJIT_INLINE __forceinline +#else +# define ASMJIT_INLINE inline +#endif +// [@CC_INLINE}@] + +// [@CC_NOINLINE{@] +// \def ASMJIT_NOINLINE +// Never inline the decorated function. +#if ASMJIT_CC_HAS_ATTRIBUTE_NOINLINE +# define ASMJIT_NOINLINE __attribute__((__noinline__)) +#elif ASMJIT_CC_HAS_DECLSPEC_NOINLINE +# define ASMJIT_NOINLINE __declspec(noinline) +#else +# define ASMJIT_NOINLINE +#endif +// [@CC_NOINLINE}@] + +// [@CC_NORETURN{@] +// \def ASMJIT_NORETURN +// The decorated function never returns (exit, assertion failure, etc...). +#if ASMJIT_CC_HAS_ATTRIBUTE_NORETURN +# define ASMJIT_NORETURN __attribute__((__noreturn__)) +#elif ASMJIT_CC_HAS_DECLSPEC_NORETURN +# define ASMJIT_NORETURN __declspec(noreturn) +#else +# define ASMJIT_NORETURN +#endif +// [@CC_NORETURN}@] + +// [@CC_CDECL{@] +// \def ASMJIT_CDECL +// Standard C function calling convention decorator (__cdecl). +#if ASMJIT_ARCH_X86 +# if ASMJIT_CC_HAS_ATTRIBUTE +# define ASMJIT_CDECL __attribute__((__cdecl__)) +# else +# define ASMJIT_CDECL __cdecl +# endif +#else +# define ASMJIT_CDECL +#endif +// [@CC_CDECL}@] + +// [@CC_STDCALL{@] +// \def ASMJIT_STDCALL +// StdCall function calling convention decorator (__stdcall). +#if ASMJIT_ARCH_X86 +# if ASMJIT_CC_HAS_ATTRIBUTE +# define ASMJIT_STDCALL __attribute__((__stdcall__)) +# else +# define ASMJIT_STDCALL __stdcall +# endif +#else +# define ASMJIT_STDCALL +#endif +// [@CC_STDCALL}@] + +// [@CC_FASTCALL{@] +// \def ASMJIT_FASTCALL +// FastCall function calling convention decorator (__fastcall). +#if ASMJIT_ARCH_X86 +# if ASMJIT_CC_HAS_ATTRIBUTE +# define ASMJIT_FASTCALL __attribute__((__fastcall__)) +# else +# define ASMJIT_FASTCALL __fastcall +# endif +#else +# define ASMJIT_FASTCALL +#endif +// [@CC_FASTCALL}@] + +// [@CC_REGPARM{@] +// \def ASMJIT_REGPARM(n) +// A custom calling convention which passes n arguments in registers. +#if ASMJIT_ARCH_X86 && (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) +# define ASMJIT_REGPARM(n) __attribute__((__regparm__(n))) +#else +# define ASMJIT_REGPARM(n) +#endif +// [@CC_REGPARM}@] + +// [@CC_NOEXCEPT{@] +// \def ASMJIT_NOEXCEPT +// The decorated function never throws an exception (noexcept). +#if ASMJIT_CC_HAS_NOEXCEPT +# define ASMJIT_NOEXCEPT noexcept +#else +# define ASMJIT_NOEXCEPT +#endif +// [@CC_NOEXCEPT}@] + +// [@CC_NOP{@] +// \def ASMJIT_NOP +// No operation. +#if !defined(ASMJIT_NOP) +# define ASMJIT_NOP ((void)0) +#endif +// [@CC_NOP}@] + +// [@CC_ASSUME{@] +// \def ASMJIT_ASSUME(exp) +// Assume that the expression exp is always true. +#if ASMJIT_CC_HAS_ASSUME +# define ASMJIT_ASSUME(exp) __assume(exp) +#elif ASMJIT_CC_HAS_BUILTIN_ASSUME +# define ASMJIT_ASSUME(exp) __builtin_assume(exp) +#elif ASMJIT_CC_HAS_BUILTIN_UNREACHABLE +# define ASMJIT_ASSUME(exp) do { if (!(exp)) __builtin_unreachable(); } while (0) +#else +# define ASMJIT_ASSUME(exp) ((void)0) +#endif +// [@CC_ASSUME}@] + +// [@CC_ASSUME_ALIGNED{@] +// \def ASMJIT_ASSUME_ALIGNED(p, alignment) +// Assume that the pointer 'p' is aligned to at least 'alignment' bytes. +#if ASMJIT_CC_HAS_ASSUME_ALIGNED +# define ASMJIT_ASSUME_ALIGNED(p, alignment) __assume_aligned(p, alignment) +#elif ASMJIT_CC_HAS_BUILTIN_ASSUME_ALIGNED +# define ASMJIT_ASSUME_ALIGNED(p, alignment) p = __builtin_assume_aligned(p, alignment) +#else +# define ASMJIT_ASSUME_ALIGNED(p, alignment) ((void)0) +#endif +// [@CC_ASSUME_ALIGNED}@] + +// [@CC_EXPECT{@] +// \def ASMJIT_LIKELY(exp) +// Expression exp is likely to be true. +// +// \def ASMJIT_UNLIKELY(exp) +// Expression exp is likely to be false. +#if ASMJIT_HAS_BUILTIN_EXPECT +# define ASMJIT_LIKELY(exp) __builtin_expect(!!(exp), 1) +# define ASMJIT_UNLIKELY(exp) __builtin_expect(!!(exp), 0) +#else +# define ASMJIT_LIKELY(exp) exp +# define ASMJIT_UNLIKELY(exp) exp +#endif +// [@CC_EXPECT}@] + +// [@CC_FALLTHROUGH{@] +// \def ASMJIT_FALLTHROUGH +// The code falls through annotation (switch / case). +#if ASMJIT_CC_CLANG && __cplusplus >= 201103L +# define ASMJIT_FALLTHROUGH [[clang::fallthrough]] +#else +# define ASMJIT_FALLTHROUGH (void)0 +#endif +// [@CC_FALLTHROUGH}@] + +// [@CC_UNUSED{@] +// \def ASMJIT_UNUSED(x) +// Mark a variable x as unused. +#define ASMJIT_UNUSED(x) (void)(x) +// [@CC_UNUSED}@] + +// [@CC_OFFSET_OF{@] +// \def ASMJIT_OFFSET_OF(x, y). +// Get the offset of a member y of a struct x at compile-time. +#define ASMJIT_OFFSET_OF(x, y) ((int)(intptr_t)((const char*)&((const x*)0x1)->y) - 1) +// [@CC_OFFSET_OF}@] + +// [@CC_ARRAY_SIZE{@] +// \def ASMJIT_ARRAY_SIZE(x) +// Get the array size of x at compile-time. +#define ASMJIT_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +// [@CC_ARRAY_SIZE}@] + +// ============================================================================ +// [asmjit::Build - STDTYPES] +// ============================================================================ + +// [@STDTYPES{@] +#if defined(__MINGW32__) || defined(__MINGW64__) +# include +#endif +#if defined(_MSC_VER) && (_MSC_VER < 1600) +# include +# if !defined(ASMJIT_SUPPRESS_STD_TYPES) +# if (_MSC_VER < 1300) +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef signed __int64 int64_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +# else +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +# endif +# endif +# define ASMJIT_INT64_C(x) (x##i64) +# define ASMJIT_UINT64_C(x) (x##ui64) +#else +# include +# include +# define ASMJIT_INT64_C(x) (x##ll) +# define ASMJIT_UINT64_C(x) (x##ull) +#endif +// [@STDTYPES}@] + +// ============================================================================ +// [asmjit::Build - Dependencies] +// ============================================================================ + +#include +#include +#include +#include +#include + +#if ASMJIT_OS_POSIX +# include +#endif // ASMJIT_OS_POSIX + +// ============================================================================ +// [asmjit::Build - Additional] +// ============================================================================ + +// Build host architecture if no architecture is selected. +#if !defined(ASMJIT_BUILD_HOST) && \ + !defined(ASMJIT_BUILD_X86) && \ + !defined(ASMJIT_BUILD_X64) +# define ASMJIT_BUILD_HOST +#endif + +// Autodetect host architecture if enabled. +#if defined(ASMJIT_BUILD_HOST) +# if ASMJIT_ARCH_X86 && !defined(ASMJIT_BUILD_X86) +# define ASMJIT_BUILD_X86 +# endif // ASMJIT_ARCH_X86 && !ASMJIT_BUILD_X86 +# if ASMJIT_ARCH_X64 && !defined(ASMJIT_BUILD_X64) +# define ASMJIT_BUILD_X64 +# endif // ASMJIT_ARCH_X64 && !ASMJIT_BUILD_X64 +#endif // ASMJIT_BUILD_HOST + +#if defined(_MSC_VER) && _MSC_VER >= 1400 +# define ASMJIT_ENUM(name) enum name : uint32_t +#else +# define ASMJIT_ENUM(name) enum name +#endif + +#if ASMJIT_ARCH_LE +# define _ASMJIT_ARCH_INDEX(total, index) (index) +#else +# define _ASMJIT_ARCH_INDEX(total, index) ((total) - 1 - (index)) +#endif + +#if !defined(ASMJIT_ALLOC) && !defined(ASMJIT_REALLOC) && !defined(ASMJIT_FREE) +# define ASMJIT_ALLOC(size) ::malloc(size) +# define ASMJIT_REALLOC(ptr, size) ::realloc(ptr, size) +# define ASMJIT_FREE(ptr) ::free(ptr) +#else +# if !defined(ASMJIT_ALLOC) || !defined(ASMJIT_REALLOC) || !defined(ASMJIT_FREE) +# error "[asmjit] You must provide ASMJIT_ALLOC, ASMJIT_REALLOC and ASMJIT_FREE." +# endif +#endif // !ASMJIT_ALLOC && !ASMJIT_REALLOC && !ASMJIT_FREE + +#define ASMJIT_NO_COPY(...) \ +private: \ + ASMJIT_INLINE __VA_ARGS__(const __VA_ARGS__& other) ASMJIT_NOEXCEPT; \ + ASMJIT_INLINE __VA_ARGS__& operator=(const __VA_ARGS__& other) ASMJIT_NOEXCEPT; \ +public: + +// ============================================================================ +// [asmjit::Build - Relative Path] +// ============================================================================ + +namespace asmjit { +namespace DebugUtils { + +// Workaround that is used to convert an absolute path to a relative one at +// a C macro level, used by asserts and tracing. This workaround is needed +// as some build systems always convert the source code files to use absolute +// paths. Please note that if absolute paths are used this doesn't remove them +// from the compiled binary and can be still considered a security risk. +enum { + kSourceRelativePathOffset = int(sizeof(__FILE__) - sizeof("asmjit/build.h")) +}; + +// ASMJIT_TRACE is only used by sources and private headers. It's safe to make +// it unavailable outside of AsmJit. +#if defined(ASMJIT_EXPORTS) +static inline int disabledTrace(...) { return 0; } +# if defined(ASMJIT_TRACE) +# define ASMJIT_TSEC(section) section +# define ASMJIT_TLOG ::printf +# else +# define ASMJIT_TSEC(section) ASMJIT_NOP +# define ASMJIT_TLOG 0 && ::asmjit::DebugUtils::disabledTrace +# endif // ASMJIT_TRACE +#endif // ASMJIT_EXPORTS + +} // DebugUtils namespace +} // asmjit namespace + +// ============================================================================ +// [asmjit::Build - Test] +// ============================================================================ + +// Include a unit testing package if this is a `asmjit_test` build. +#if defined(ASMJIT_TEST) +# include "../test/broken.h" +#endif // ASMJIT_TEST + +// [Guard] +#endif // _ASMJIT_BUILD_H diff --git a/DynamicHooks/thirdparty/AsmJit/host.h b/DynamicHooks/thirdparty/AsmJit/host.h new file mode 100644 index 0000000..d99ca9d --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/host.h @@ -0,0 +1,53 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_HOST_H +#define _ASMJIT_HOST_H + +// [Dependencies] +#include "./base.h" + +// [X86 / X64] +#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 +#include "./x86.h" + +namespace asmjit { + +// Define `asmjit::host` namespace wrapping `asmjit::x86`. +namespace host { using namespace ::asmjit::x86; } + +// Define host assembler. +typedef X86Assembler HostAssembler; + +// Define host operands. +typedef X86GpReg GpReg; +typedef X86FpReg FpReg; +typedef X86MmReg MmReg; +typedef X86XmmReg XmmReg; +typedef X86YmmReg YmmReg; +typedef X86SegReg SegReg; +typedef X86Mem Mem; + +// Define host compiler and related. +#if !defined(ASMJIT_DISABLE_COMPILER) +typedef X86Compiler HostCompiler; +typedef X86CallNode HostCallNode; +typedef X86FuncDecl HostFuncDecl; +typedef X86FuncNode HostFuncNode; + +typedef X86GpVar GpVar; +typedef X86MmVar MmVar; +typedef X86XmmVar XmmVar; +typedef X86YmmVar YmmVar; +#endif // !ASMJIT_DISABLE_COMPILER + +} // asmjit namespace + +#endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 + +// [Guard] +#endif // _ASMJIT_HOST_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86.h b/DynamicHooks/thirdparty/AsmJit/x86.h new file mode 100644 index 0000000..4d9e796 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86.h @@ -0,0 +1,21 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_H +#define _ASMJIT_X86_H + +// [Dependencies] +#include "./base.h" + +#include "./x86/x86assembler.h" +#include "./x86/x86compiler.h" +#include "./x86/x86compilerfunc.h" +#include "./x86/x86inst.h" +#include "./x86/x86operand.h" + +// [Guard] +#endif // _ASMJIT_X86_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86assembler.cpp b/DynamicHooks/thirdparty/AsmJit/x86/x86assembler.cpp new file mode 100644 index 0000000..105e8bb --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86assembler.cpp @@ -0,0 +1,4304 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies] +#include "../base/containers.h" +#include "../base/cpuinfo.h" +#include "../base/logger.h" +#include "../base/runtime.h" +#include "../base/utils.h" +#include "../base/vmem.h" +#include "../x86/x86assembler.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Constants] +// ============================================================================ + +enum { kX86RexNoRexMask = kX86InstOptionRex | _kX86InstOptionNoRex }; + +//! \internal +//! +//! X86/X64 bytes used to encode important prefixes. +enum X86Byte { + //! 1-byte REX prefix mask. + kX86ByteRex = 0x40, + + //! 1-byte REX.W component. + kX86ByteRexW = 0x08, + + //! 2-byte VEX prefix: + //! - `[0]` - `0xC5`. + //! - `[1]` - `RvvvvLpp`. + kX86ByteVex2 = 0xC5, + + //! 3-byte VEX prefix. + //! - `[0]` - `0xC4`. + //! - `[1]` - `RXBmmmmm`. + //! - `[2]` - `WvvvvLpp`. + kX86ByteVex3 = 0xC4, + + //! 3-byte XOP prefix. + //! - `[0]` - `0x8F`. + //! - `[1]` - `RXBmmmmm`. + //! - `[2]` - `WvvvvLpp`. + kX86ByteXop3 = 0x8F, + + //! 4-byte EVEX prefix. + //! - `[0]` - `0x62`. + //! - `[1]` - Payload0 or `P[ 7: 0]` - `[R X B R' 0 0 m m]`. + //! - `[2]` - Payload1 or `P[15: 8]` - `[W v v v v 1 p p]`. + //! - `[3]` - Payload2 or `P[23:16]` - `[z L' L b V' a a a]`. + //! + //! Groups: + //! - `P[ 1: 0]` - EXT: VEX.mmmmm, only lowest 2 bits used. + //! - `P[ 3: 2]` - ___: Must be 0. + //! - `P[ 4]` - REG: EVEX.R'. + //! - `P[ 5]` - REG: EVEX.B. + //! - `P[ 6]` - REG: EVEX.X. + //! - `P[ 7]` - REG: EVEX.R. + //! - `P[ 9: 8]` - EXT: VEX.pp. + //! - `P[ 10]` - ___: Must be 1. + //! - `P[14:11]` - REG: 2nd SRC vector register (4 bits). + //! - `P[ 15]` - EXT: VEX.W. + //! - `P[18:16]` - REG: K registers k0...k7 (Merging/Zeroing Vector Ops). + //! - `P[ 19]` - REG: 2nd SRC vector register (Hi bit). + //! - `P[ 20]` - EXT: Broadcast/Static-Rounding/SAE bit. + //! - `P[22.21]` - EXT: Vector Length/Rounding Control. + //! - `P[ 23]` - EXT: Destination result behavior (Merging/Zeroing Vector Ops). + kX86ByteEvex = 0x62 +}; + +// AsmJit specific (used to encode VVVV field in XOP/VEX). +enum VexVVVV { + kVexVVVVShift = 12, + kVexVVVVMask = 0xF << kVexVVVVShift +}; + +//! \internal +//! +//! Instruction 2-byte/3-byte opcode prefix definition. +struct X86OpCodeMM { + uint8_t len; + uint8_t data[3]; +}; + +//! \internal +//! +//! Mandatory prefixes used to encode [66, F3, F2] and [9B]. +static const uint8_t x86OpCodePP[8] = { + 0x00, 0x66, 0xF3, 0xF2, 0x00, 0x00, 0x00, 0x9B +}; + +//! \internal +//! +//! Instruction 2-byte/3-byte opcode prefix data. +static const X86OpCodeMM x86OpCodeMM[] = { + { 0, { 0x00, 0x00, 0 } }, + { 1, { 0x0F, 0x00, 0 } }, + { 2, { 0x0F, 0x38, 0 } }, + { 2, { 0x0F, 0x3A, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 2, { 0x0F, 0x01, 0 } } +}; + +static const uint8_t x86SegmentPrefix[8] = { 0x00, 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 }; +static const uint8_t x86OpCodePushSeg[8] = { 0x00, 0x06, 0x0E, 0x16, 0x1E, 0xA0, 0xA8 }; +static const uint8_t x86OpCodePopSeg[8] = { 0x00, 0x07, 0x00, 0x17, 0x1F, 0xA1, 0xA9 }; + +// ============================================================================ +// [Utils] +// ============================================================================ + +static ASMJIT_INLINE uint32_t x86RexFromOpCodeAndOptions(uint32_t opCode, uint32_t options) { + uint32_t rex = (opCode >> (kX86InstOpCode_W_Shift - 3)); + ASMJIT_ASSERT((rex & ~static_cast(0x08)) == 0); + + return rex + (options & kX86RexNoRexMask); +} + +static ASMJIT_INLINE bool x86RexIsInvalid(uint32_t rex) { + return rex >= _kX86InstOptionNoRex; +} + +//! Encode ModR/M. +static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) { + ASMJIT_ASSERT(m <= 3); + ASMJIT_ASSERT(o <= 7); + ASMJIT_ASSERT(rm <= 7); + return (m << 6) + (o << 3) + rm; +} + +//! Encode SIB. +static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) { + ASMJIT_ASSERT(s <= 3); + ASMJIT_ASSERT(i <= 7); + ASMJIT_ASSERT(b <= 7); + return (s << 6) + (i << 3) + b; +} + +//! Get if the given pointers `a` and `b` can be encoded by using relative +//! displacement, which fits into a signed 32-bit integer. +static ASMJIT_INLINE bool x64IsRelative(Ptr a, Ptr b) { + SignedPtr diff = static_cast(a) - static_cast(b); + return Utils::isInt32(diff); +} + +//! Cast `reg` to `X86Reg` and get the register index. +static ASMJIT_INLINE uint32_t x86OpReg(const Operand* reg) { + return static_cast(reg)->getRegIndex(); +} + +//! Cast `mem` to `X86Mem` and return it. +static ASMJIT_INLINE const X86Mem* x86OpMem(const Operand* mem) { + return static_cast(mem); +} + +//! Combine `regIndex` and `vvvvIndex` into single value (used by AVX and AVX-512). +static ASMJIT_INLINE uint32_t x86RegAndVvvv(uint32_t regIndex, uint32_t vvvvIndex) { + return regIndex + (vvvvIndex << kVexVVVVShift); +} + +//! Get `O` field of `opCode`. +static ASMJIT_INLINE uint32_t x86ExtractO(uint32_t opCode) { + return (opCode >> kX86InstOpCode_O_Shift) & 0x07; +} + +static ASMJIT_INLINE bool x86IsGpq(const Operand* op) { return op->isRegType(kX86RegTypeGpq); } +static ASMJIT_INLINE bool x86IsGpq(const X86Reg* reg) { return reg->isGpq(); } + +static ASMJIT_INLINE bool x86IsXmm(const Operand* op) { return op->isRegType(kX86RegTypeXmm); } +static ASMJIT_INLINE bool x86IsXmm(const X86Reg* reg) { return reg->isXmm(); } + +static ASMJIT_INLINE bool x86IsYmm(const Operand* op) { return op->isRegType(kX86RegTypeYmm); } +static ASMJIT_INLINE bool x86IsYmm(const X86Reg* reg) { return reg->isYmm(); } + +static ASMJIT_INLINE bool x86IsZmm(const Operand* op) { return op->isRegType(kX86RegTypeZmm); } +static ASMJIT_INLINE bool x86IsZmm(const X86Reg* reg) { return reg->isZmm(); } + +// ============================================================================ +// [Macros] +// ============================================================================ + +#define ENC_OPS(op0, op1, op2) \ + ((Operand::kType##op0) + ((Operand::kType##op1) << 3) + ((Operand::kType##op2) << 6)) + +#define ADD_66H_P(exp) \ + do { \ + opCode |= (static_cast(exp) << kX86InstOpCode_PP_Shift); \ + } while (0) + +#define ADD_66H_P_BY_SIZE(sz) \ + do { \ + opCode |= (static_cast(sz) & 0x02) << (kX86InstOpCode_PP_Shift - 1); \ + } while (0) + +#define ADD_REX_W(exp) \ + do { \ + if (Arch == kArchX64) \ + opCode |= static_cast(exp) << kX86InstOpCode_W_Shift; \ + } while (0) + +#define ADD_REX_W_BY_SIZE(sz) \ + do { \ + if (Arch == kArchX64 && (sz) == 8) \ + opCode |= kX86InstOpCode_W; \ + } while (0) + +#define ADD_VEX_W(exp) \ + do { \ + opCode |= static_cast(exp) << kX86InstOpCode_W_Shift; \ + } while (0) + +#define ADD_VEX_L(exp) \ + do { \ + opCode |= static_cast(exp) << kX86InstOpCode_L_Shift; \ + } while (0) + +#define EMIT_BYTE(_Val_) \ + do { \ + cursor[0] = static_cast((_Val_) & 0xFF); \ + cursor += 1; \ + } while (0) + +#define EMIT_WORD(_Val_) \ + do { \ + Utils::writeU16uLE(cursor, static_cast(_Val_)); \ + cursor += 2; \ + } while (0) + +#define EMIT_DWORD(_Val_) \ + do { \ + Utils::writeU32uLE(cursor, static_cast(_Val_)); \ + cursor += 4; \ + } while (0) + +#define EMIT_QWORD(_Val_) \ + do { \ + Utils::writeU64uLE(cursor, static_cast(_Val_)); \ + cursor += 8; \ + } while (0) + +#define EMIT_PP(_Val_) \ + do { \ + uint32_t ppIndex = ((_Val_) >> kX86InstOpCode_PP_Shift) & (kX86InstOpCode_PP_Mask >> kX86InstOpCode_PP_Shift); \ + uint8_t ppCode = x86OpCodePP[ppIndex]; \ + \ + if (!ppIndex) \ + break; \ + \ + cursor[0] = ppCode; \ + cursor++; \ + } while (0) + +#define EMIT_MM(_Val_) \ + do { \ + uint32_t mmIndex = ((_Val_) >> kX86InstOpCode_MM_Shift) & (kX86InstOpCode_MM_Mask >> kX86InstOpCode_MM_Shift); \ + const X86OpCodeMM& mmCode = x86OpCodeMM[mmIndex]; \ + \ + if (!mmIndex) \ + break; \ + \ + cursor[0] = mmCode.data[0]; \ + cursor[1] = mmCode.data[1]; \ + cursor += mmCode.len; \ + } while (0) + +// ============================================================================ +// [asmjit::X86Assembler - Construction / Destruction] +// ============================================================================ + +X86Assembler::X86Assembler(Runtime* runtime, uint32_t arch) + : Assembler(runtime), + zax(NoInit), + zcx(NoInit), + zdx(NoInit), + zbx(NoInit), + zsp(NoInit), + zbp(NoInit), + zsi(NoInit), + zdi(NoInit) { + + ASMJIT_ASSERT(arch == kArchX86 || arch == kArchX64); + _setArch(arch); +} + +X86Assembler::~X86Assembler() {} + +// ============================================================================ +// [asmjit::X86Assembler - Arch] +// ============================================================================ + +Error X86Assembler::_setArch(uint32_t arch) { +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) { + _arch = arch; + _regSize = 4; + + _regCount.reset(); + _regCount._gp = 8; + _regCount._mm = 8; + _regCount._k = 8; + _regCount._xyz = 8; + ::memcpy(&zax, &x86RegData.gpd, sizeof(Operand) * 8); + + return kErrorOk; + } +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) + if (arch == kArchX64) { + _arch = arch; + _regSize = 8; + + _regCount.reset(); + _regCount._gp = 16; + _regCount._mm = 8; + _regCount._k = 8; + _regCount._xyz = 16; + ::memcpy(&zax, &x86RegData.gpq, sizeof(Operand) * 8); + + return kErrorOk; + } +#endif // ASMJIT_BUILD_X64 + + return kErrorInvalidArgument; +} + +// ============================================================================ +// [asmjit::X86Assembler - Embed] +// ============================================================================ + +Error X86Assembler::embedLabel(const Label& op) { + ASMJIT_ASSERT(op.getId() != kInvalidValue); + uint32_t regSize = _regSize; + + if (getRemainingSpace() < regSize) + ASMJIT_PROPAGATE_ERROR(_grow(regSize)); + + uint8_t* cursor = getCursor(); + LabelData* label = getLabelData(op.getId()); + RelocData rd; + +#if !defined(ASMJIT_DISABLE_LOGGER) + if (_logger) + _logger->logFormat(Logger::kStyleData, regSize == 4 ? ".dd L%u\n" : ".dq L%u\n", op.getId()); +#endif // !ASMJIT_DISABLE_LOGGER + + rd.type = kRelocRelToAbs; + rd.size = regSize; + rd.from = static_cast(getOffset()); + rd.data = 0; + + if (label->offset != -1) { + // Bound label. + rd.data = static_cast(static_cast(label->offset)); + } + else { + // Non-bound label. Need to chain. + LabelLink* link = _newLabelLink(); + + link->prev = (LabelLink*)label->links; + link->offset = getOffset(); + link->displacement = 0; + link->relocId = _relocations.getLength(); + + label->links = link; + } + + if (_relocations.append(rd) != kErrorOk) + return setLastError(kErrorNoHeapMemory); + + // Emit dummy intptr_t (4 or 8 bytes; depends on the address size). + EMIT_DWORD(0); + if (regSize == 8) + EMIT_DWORD(0); + + setCursor(cursor); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86Assembler - Align] +// ============================================================================ + +Error X86Assembler::align(uint32_t alignMode, uint32_t offset) noexcept { +#if !defined(ASMJIT_DISABLE_LOGGER) + if (_logger) + _logger->logFormat(Logger::kStyleDirective, + "%s.align %u\n", _logger->getIndentation(), static_cast(offset)); +#endif // !ASMJIT_DISABLE_LOGGER + + if (alignMode > kAlignZero) + return setLastError(kErrorInvalidArgument); + + if (offset <= 1) + return kErrorOk; + + if (!Utils::isPowerOf2(offset) || offset > 64) + return setLastError(kErrorInvalidArgument); + + uint32_t i = static_cast(Utils::alignDiff(getOffset(), offset)); + if (i == 0) + return kErrorOk; + + if (getRemainingSpace() < i) + ASMJIT_PROPAGATE_ERROR(_grow(i)); + + uint8_t* cursor = getCursor(); + uint8_t pattern = 0x00; + + switch (alignMode) { + case kAlignCode: { + if (hasAsmOption(kOptionOptimizedAlign)) { + // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP). + enum { kMaxNopSize = 9 }; + + static const uint8_t nopData[kMaxNopSize][kMaxNopSize] = { + { 0x90 }, + { 0x66, 0x90 }, + { 0x0F, 0x1F, 0x00 }, + { 0x0F, 0x1F, 0x40, 0x00 }, + { 0x0F, 0x1F, 0x44, 0x00, 0x00 }, + { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }, + { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }, + { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 } + }; + + do { + uint32_t n = Utils::iMin(i, kMaxNopSize); + const uint8_t* p = nopData[n - 1]; + + i -= n; + do { + EMIT_BYTE(*p++); + } while (--n); + } while (i); + } + + pattern = 0x90; + break; + } + + case kAlignData: { + pattern = 0xCC; + break; + } + + case kAlignZero: { + // Already set to zero. + break; + } + } + + while (i) { + EMIT_BYTE(pattern); + i--; + } + + setCursor(cursor); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86Assembler - Reloc] +// ============================================================================ + +size_t X86Assembler::_relocCode(void* _dst, Ptr baseAddress) const noexcept { + uint32_t arch = getArch(); + uint8_t* dst = static_cast(_dst); + +#if !defined(ASMJIT_DISABLE_LOGGER) + Logger* logger = getLogger(); +#endif // ASMJIT_DISABLE_LOGGER + + size_t minCodeSize = getOffset(); // Current offset is the minimum code size. + size_t maxCodeSize = getCodeSize(); // Includes all possible trampolines. + + // We will copy the exact size of the generated code. Extra code for trampolines + // is generated on-the-fly by the relocator (this code doesn't exist at the moment). + ::memcpy(dst, _buffer, minCodeSize); + + // Trampoline pointer. + uint8_t* tramp = dst + minCodeSize; + + // Relocate all recorded locations. + size_t relocCount = _relocations.getLength(); + const RelocData* rdList = _relocations.getData(); + + for (size_t i = 0; i < relocCount; i++) { + const RelocData& rd = rdList[i]; + + // Make sure that the `RelocData` is correct. + Ptr ptr = rd.data; + + size_t offset = static_cast(rd.from); + ASMJIT_ASSERT(offset + rd.size <= static_cast(maxCodeSize)); + + // Whether to use trampoline, can be only used if relocation type is + // kRelocAbsToRel on 64-bit. + bool useTrampoline = false; + + switch (rd.type) { + case kRelocAbsToAbs: + break; + + case kRelocRelToAbs: + ptr += baseAddress; + break; + + case kRelocAbsToRel: + ptr -= baseAddress + rd.from + 4; + break; + + case kRelocTrampoline: + ptr -= baseAddress + rd.from + 4; + if (!Utils::isInt32(static_cast(ptr))) { + ptr = (Ptr)tramp - (baseAddress + rd.from + 4); + useTrampoline = true; + } + break; + + default: + ASMJIT_NOT_REACHED(); + } + + switch (rd.size) { + case 4: + Utils::writeU32u(dst + offset, static_cast(static_cast(ptr))); + break; + + case 8: + Utils::writeI64u(dst + offset, static_cast(ptr)); + break; + + default: + ASMJIT_NOT_REACHED(); + } + + // Handle the trampoline case. + if (useTrampoline) { + // Bytes that replace [REX, OPCODE] bytes. + uint32_t byte0 = 0xFF; + uint32_t byte1 = dst[offset - 1]; + + // Call, patch to FF/2 (-> 0x15). + if (byte1 == 0xE8) + byte1 = x86EncodeMod(0, 2, 5); + // Jmp, patch to FF/4 (-> 0x25). + else if (byte1 == 0xE9) + byte1 = x86EncodeMod(0, 4, 5); + + // Patch `jmp/call` instruction. + ASMJIT_ASSERT(offset >= 2); + dst[offset - 2] = byte0; + dst[offset - 1] = byte1; + + // Absolute address. + Utils::writeU64u(tramp, static_cast(rd.data)); + + // Advance trampoline pointer. + tramp += 8; + +#if !defined(ASMJIT_DISABLE_LOGGER) + if (logger) + logger->logFormat(Logger::kStyleComment, "; Trampoline %llX\n", rd.data); +#endif // !ASMJIT_DISABLE_LOGGER + } + } + + if (arch == kArchX64) + return (size_t)(tramp - dst); + else + return (size_t)(minCodeSize); +} + +// ============================================================================ +// [asmjit::X86Assembler - Logging] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_LOGGER) +// Logging helpers. +static const char* AssemblerX86_getAddressSizeString(uint32_t size) noexcept { + switch (size) { + case 1 : return "byte ptr "; + case 2 : return "word ptr "; + case 4 : return "dword ptr "; + case 8 : return "qword ptr "; + case 10: return "tword ptr "; + case 16: return "dqword ptr "; + case 32: return "yword ptr "; + case 64: return "zword ptr "; + default: return ""; + } +} + +static const char X86Assembler_segName[] = + "\0\0\0\0" + "es:\0" + "cs:\0" + "ss:\0" + "ds:\0" + "fs:\0" + "gs:\0" + "\0\0\0\0"; + +static void X86Assembler_dumpRegister(StringBuilder& sb, uint32_t type, uint32_t index) { + // -- (Not-Encodable). + static const char reg8l[] = "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0" "bpl\0" "sil\0" "dil\0" ; + static const char reg8h[] = "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "--\0\0" "--\0\0" "--\0\0" "--\0\0"; + static const char reg16[] = "ax\0\0" "cx\0\0" "dx\0\0" "bx\0\0" "sp\0\0" "bp\0\0" "si\0\0" "di\0\0"; + + char suffix = '\0'; + + switch (type) { + case kX86RegTypeGpbLo: + if (index >= 8) { + sb._appendChar('r'); + suffix = 'b'; + goto _EmitID; + } + + sb._appendString(®8l[index * 4]); + return; + + case _kX86RegTypePatchedGpbHi: + if (index < 4) + goto _EmitNE; + + index -= 4; + ASMJIT_FALLTHROUGH; + + case kX86RegTypeGpbHi: + if (index >= 4) + goto _EmitNE; + + sb._appendString(®8h[index * 4]); + return; + +_EmitNE: + sb._appendString("--", 2); + return; + + case kX86RegTypeGpw: + if (index >= 8) { + sb._appendChar('r'); + suffix = 'w'; + goto _EmitID; + } + + sb._appendString(®16[index * 4]); + return; + + case kX86RegTypeGpd: + if (index >= 8) { + sb._appendChar('r'); + suffix = 'd'; + goto _EmitID; + } + + sb._appendChar('e'); + sb._appendString(®16[index * 4]); + return; + + case kX86RegTypeGpq: + sb._appendChar('r'); + if (index >= 8) + goto _EmitID; + + sb._appendString(®16[index * 4]); + return; + + case kX86RegTypeK: + sb._appendString("k", 1); + goto _EmitID; + + case kX86RegTypeFp: + sb._appendString("fp", 2); + goto _EmitID; + + case kX86RegTypeMm: + sb._appendString("mm", 2); + goto _EmitID; + + case kX86RegTypeXmm: + sb._appendString("xmm", 3); + goto _EmitID; + + case kX86RegTypeYmm: + sb._appendString("ymm", 3); + goto _EmitID; + + case kX86RegTypeZmm: + sb._appendString("zmm", 3); + goto _EmitID; + + case kX86RegTypeSeg: + if (index >= kX86SegCount) + goto _EmitNE; + + sb._appendString(&X86Assembler_segName[index * 4], 2); + return; + + default: + return; + } + +_EmitID: + sb._appendUInt32(index); + + if (suffix) + sb._appendChar(suffix); +} + +static void X86Assembler_dumpOperand(StringBuilder& sb, uint32_t arch, const Operand* op, uint32_t loggerOptions) { + if (op->isReg()) { + X86Assembler_dumpRegister(sb, + static_cast(op)->getRegType(), + static_cast(op)->getRegIndex()); + } + else if (op->isMem()) { + const X86Mem* m = static_cast(op); + + uint32_t type = kX86RegTypeGpd; + uint32_t seg = m->getSegment(); + bool isAbsolute = false; + + if (arch == kArchX86) { + if (!m->hasGpdBase()) + type = kX86RegTypeGpw; + } + else { + if (!m->hasGpdBase()) + type = kX86RegTypeGpq; + } + + sb._appendString(AssemblerX86_getAddressSizeString(op->getSize())); + + if (seg < kX86SegCount) + sb._appendString(&X86Assembler_segName[seg * 4]); + + sb._appendChar('['); + switch (m->getMemType()) { + case kMemTypeBaseIndex: + case kMemTypeStackIndex: + // [base + index << shift + displacement] + X86Assembler_dumpRegister(sb, type, m->getBase()); + break; + + case kMemTypeLabel: + // [label + index << shift + displacement] + sb.appendFormat("L%u", m->getBase()); + break; + + case kMemTypeAbsolute: + // [absolute] + isAbsolute = true; + sb.appendUInt(static_cast(m->getDisplacement()), 16); + break; + + case kMemTypeRip: + // [rip + displacement] + sb.appendString("rip", 3); + break; + + default: + sb.appendFormat("", m->getMemType()); + break; + } + + if (m->hasIndex()) { + switch (m->getVSib()) { + case kX86MemVSibXmm: type = kX86RegTypeXmm; break; + case kX86MemVSibYmm: type = kX86RegTypeYmm; break; + } + + sb._appendChar('+'); + X86Assembler_dumpRegister(sb, type, m->getIndex()); + + if (m->getShift()) { + sb._appendChar('*'); + sb._appendChar("1248"[m->getShift() & 3]); + } + } + + if (m->getDisplacement() && !isAbsolute) { + uint32_t base = 10; + int32_t dispOffset = m->getDisplacement(); + + char prefix = '+'; + if (dispOffset < 0) { + dispOffset = -dispOffset; + prefix = '-'; + } + + sb._appendChar(prefix); + if ((loggerOptions & Logger::kOptionHexDisplacement) != 0 && dispOffset > 9) { + sb._appendString("0x", 2); + base = 16; + } + sb.appendUInt(static_cast(dispOffset), base); + } + + sb._appendChar(']'); + } + else if (op->isImm()) { + const Imm* i = static_cast(op); + int64_t val = i->getInt64(); + + if ((loggerOptions & Logger::kOptionHexImmediate) != 0 && static_cast(val) > 9) + sb.appendUInt(static_cast(val), 16); + else + sb.appendInt(val, 10); + } + else if (op->isLabel()) { + sb.appendFormat("L%u", op->getId()); + } + else { + sb._appendString("None", 4); + } +} + +static bool X86Assembler_dumpInstruction(StringBuilder& sb, + uint32_t arch, + uint32_t code, + uint32_t options, + const Operand* o0, + const Operand* o1, + const Operand* o2, + const Operand* o3, + uint32_t loggerOptions) { + + if (!sb.reserve(sb.getLength() + 128)) + return false; + + // Rex, lock and short prefix. + if (options & kX86InstOptionRex) + sb._appendString("rex ", 4); + + if (options & kX86InstOptionLock) + sb._appendString("lock ", 5); + + if (options & kInstOptionShortForm) + sb._appendString("short ", 6); + + // Dump instruction name. + sb._appendString(X86Util::getInstNameById(code)); + + // Dump operands. + if (!o0->isNone()) { + sb._appendChar(' '); + X86Assembler_dumpOperand(sb, arch, o0, loggerOptions); + } + + if (!o1->isNone()) { + sb._appendString(", ", 2); + X86Assembler_dumpOperand(sb, arch, o1, loggerOptions); + } + + if (!o2->isNone()) { + sb._appendString(", ", 2); + X86Assembler_dumpOperand(sb, arch, o2, loggerOptions); + } + + if (!o3->isNone()) { + sb._appendString(", ", 2); + X86Assembler_dumpOperand(sb, arch, o3, loggerOptions); + } + + return true; +} +#endif // !ASMJIT_DISABLE_LOGGER + +// ============================================================================ +// [asmjit::X86Assembler - Emit] +// ============================================================================ + +#define HI_REG(_Index_) ((_kX86RegTypePatchedGpbHi << 8) | _Index_) +//! \internal +static const Operand::VRegOp x86PatchedHiRegs[4] = { + // ----------------+---+--------------+--------------+------------+ + // Operand | S | Reg. Code | OperandId | Unused | + // ----------------+---+--------------+--------------+------------+ + { Operand::kTypeReg, 1 , { HI_REG(4) }, kInvalidValue, {{ 0, 0 }} }, + { Operand::kTypeReg, 1 , { HI_REG(5) }, kInvalidValue, {{ 0, 0 }} }, + { Operand::kTypeReg, 1 , { HI_REG(6) }, kInvalidValue, {{ 0, 0 }} }, + { Operand::kTypeReg, 1 , { HI_REG(7) }, kInvalidValue, {{ 0, 0 }} } +}; +#undef HI_REG + +template +static ASMJIT_INLINE Error X86Assembler_emit(Assembler* self_, uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3) { + X86Assembler* self = static_cast(self_); + uint32_t options = self->getInstOptionsAndReset(); + + // Invalid instruction. + if (code >= _kX86InstIdCount) { + self->_comment = nullptr; + return self->setLastError(kErrorUnknownInst); + } + + // -------------------------------------------------------------------------- + // [Grow] + // -------------------------------------------------------------------------- + + // Grow request happens rarely. + uint8_t* cursor = self->getCursor(); + if (ASMJIT_UNLIKELY((size_t)(self->_end - cursor) < 16)) { + ASMJIT_PROPAGATE_ERROR(self->_grow(16)); + cursor = self->getCursor(); + } + + // -------------------------------------------------------------------------- + // [Prepare] + // -------------------------------------------------------------------------- + + uint32_t encoded = o0->getOp() + (o1->getOp() << 3) + (o2->getOp() << 6); + + // Instruction opcode. + uint32_t opCode; + // ModR/M opcode or register code. + uint32_t opReg; + + // ModR/M, both rmReg and rmMem should refer to the same variable since they + // are never used together - either `rmReg` or `rmMem`. + union { + // ModR/M - register code. + uintptr_t rmReg; + // ModR/M - Memory operand. + const X86Mem* rmMem; + }; + + // Immediate value. + int64_t imVal; + // Immediate length. + uint32_t imLen = 0; + + // Memory operand base register index. + uint32_t mBase; + // Memory operand index register index. + uint32_t mIndex; + + // Label. + LabelData* label; + // Displacement offset + int32_t dispOffset; + // Displacement size. + uint32_t dispSize = 0; + // Displacement relocation id. + intptr_t relocId; + + bool assertIllegal = false; + + const X86InstInfo& info = _x86InstInfo[code]; + const X86InstExtendedInfo& extendedInfo = info.getExtendedInfo(); + + opCode = info.getPrimaryOpCode(); + opReg = x86ExtractO(opCode); + + if (Arch == kArchX86) { + // Check if one or more register operand is one of AH, BH, CH, or DH and + // patch them to ensure that the binary code with correct byte-index (4-7) + // is generated. + if (o0->isRegType(kX86RegTypeGpbHi)) + o0 = (const Operand*)(&x86PatchedHiRegs[x86OpReg(o0)]); + + if (o1->isRegType(kX86RegTypeGpbHi)) + o1 = (const Operand*)(&x86PatchedHiRegs[x86OpReg(o1)]); + } + else { + // `W` field. + ASMJIT_ASSERT(static_cast(kX86InstOptionRex) == static_cast(kX86ByteRex)); + + // Check if one or more register operand is one of BPL, SPL, SIL, DIL and + // force a REX prefix to be emitted in such case. + if (X86Reg::isGpbReg(*o0)) { + uint32_t index = x86OpReg(o0); + if (static_cast(o0)->isGpbLo()) { + options |= (index >= 4) ? kX86InstOptionRex : 0; + } + else { + options |= _kX86InstOptionNoRex; + o0 = reinterpret_cast(&x86PatchedHiRegs[index]); + } + } + + if (X86Reg::isGpbReg(*o1)) { + uint32_t index = x86OpReg(o1); + if (static_cast(o1)->isGpbLo()) { + options |= (index >= 4) ? kX86InstOptionRex : 0; + } + else { + options |= _kX86InstOptionNoRex; + o1 = reinterpret_cast(&x86PatchedHiRegs[index]); + } + } + } + + // -------------------------------------------------------------------------- + // [Lock-Prefix] + // -------------------------------------------------------------------------- + + if (options & kX86InstOptionLock) { + if (!extendedInfo.isLockable()) + goto _IllegalInst; + EMIT_BYTE(0xF0); + } + + // -------------------------------------------------------------------------- + // [Group] + // -------------------------------------------------------------------------- + + switch (info.getEncoding()) { + // ------------------------------------------------------------------------ + // [None] + // ------------------------------------------------------------------------ + + case kX86InstEncodingNone: + goto _EmitDone; + + // ------------------------------------------------------------------------ + // [X86] + // ------------------------------------------------------------------------ + + case kX86InstEncodingX86Op_66H: + ADD_66H_P(true); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingX86Op: + goto _EmitX86Op; + + case kX86InstEncodingX86Rm_B: + opCode += o0->getSize() != 1; + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingX86Rm: + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86RmReg: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opCode += o0->getSize() != 1; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86RegRm: + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + if (o0->getSize() == 1 || o0->getSize() != o1->getSize()) + goto _IllegalInst; + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + if (o0->getSize() == 1) + goto _IllegalInst; + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86M: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Arith: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opCode += (o0->getSize() != 1) + 2; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opCode += (o0->getSize() != 1) + 2; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + // The remaining instructions use 0x80 opcode. + opCode = 0x80; + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = Utils::isInt8(imVal) ? static_cast(1) : Utils::iMin(o0->getSize(), 4); + rmReg = x86OpReg(o0); + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + // Alternate Form - AL, AX, EAX, RAX. + if (rmReg == 0 && (o0->getSize() == 1 || imLen != 1)) { + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= ((opReg << 3) | (0x04 + (o0->getSize() != 1))); + imLen = Utils::iMin(o0->getSize(), 4); + goto _EmitX86Op; + } + + opCode += o0->getSize() != 1 ? (imLen != 1 ? 1 : 3) : 0; + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + uint32_t memSize = o0->getSize(); + + if (memSize == 0) + goto _IllegalInst; + + imVal = static_cast(o1)->getInt64(); + imLen = Utils::isInt8(imVal) ? static_cast(1) : Utils::iMin(memSize, 4); + + opCode += memSize != 1 ? (imLen != 1 ? 1 : 3) : 0; + ADD_66H_P_BY_SIZE(memSize); + ADD_REX_W_BY_SIZE(memSize); + + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86BSwap: + if (encoded == ENC_OPS(Reg, None, None)) { + if (o0->getSize() < 4) + goto _IllegalInst; + + opReg = x86OpReg(o0); + ADD_REX_W_BY_SIZE(o0->getSize()); + goto _EmitX86OpWithOpReg; + } + break; + + case kX86InstEncodingX86BTest: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + // The remaining instructions use the secondary opcode/r. + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86ExtractO(opCode); + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + if (encoded == ENC_OPS(Reg, Imm, None)) { + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + if (o0->getSize() == 0) + goto _IllegalInst; + + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Call: + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + goto _EmitJmpOrCallAbs; + } + + if (encoded == ENC_OPS(Label, None, None)) { + label = self->getLabelData(static_cast(o0)->getId()); + if (label->offset != -1) { + // Bound label. + static const intptr_t kRel32Size = 5; + intptr_t offs = label->offset - (intptr_t)(cursor - self->_buffer); + + ASMJIT_ASSERT(offs <= 0); + EMIT_BYTE(opCode); + EMIT_DWORD(static_cast(offs - kRel32Size)); + } + else { + // Non-bound label. + EMIT_BYTE(opCode); + dispOffset = -4; + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + goto _EmitDone; + } + break; + + case kX86InstEncodingX86Enter: + if (encoded == ENC_OPS(Imm, Imm, None)) { + EMIT_BYTE(0xC8); + EMIT_WORD(static_cast(o1)->getUInt16()); + EMIT_BYTE(static_cast(o0)->getUInt8()); + goto _EmitDone; + } + break; + + case kX86InstEncodingX86Imul: + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + if (encoded == ENC_OPS(Reg, None, None)) { + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= 0xF6 + (o0->getSize() != 1); + + opReg = 5; + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= 0xF6 + (o0->getSize() != 1); + + opReg = 5; + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + // The following instructions use 0x0FAF opcode. + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= kX86InstOpCode_MM_0F | 0xAF; + + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + + goto _EmitX86M; + } + + // The following instructions use 0x69/0x6B opcode. + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= 0x6B; + + if (encoded == ENC_OPS(Reg, Imm, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + if (!Utils::isInt8(imVal)) { + opCode -= 2; + imLen = o0->getSize() == 2 ? 2 : 4; + } + + opReg = x86OpReg(o0); + rmReg = opReg; + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (!Utils::isInt8(imVal)) { + opCode -= 2; + imLen = o0->getSize() == 2 ? 2 : 4; + } + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (!Utils::isInt8(imVal)) { + opCode -= 2; + imLen = o0->getSize() == 2 ? 2 : 4; + } + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86IncDec: + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = x86OpReg(o0); + + // INC r16|r32 is not encodable in 64-bit mode. + if (Arch == kArchX86 && (o0->getSize() == 2 || o0->getSize() == 4)) { + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= extendedInfo.getSecondaryOpCode() + (static_cast(rmReg) & 0x07); + goto _EmitX86Op; + } + else { + opCode += o0->getSize() != 1; + goto _EmitX86R; + } + } + + if (encoded == ENC_OPS(Mem, None, None)) { + opCode += o0->getSize() != 1; + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Int: + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + uint8_t imm8 = static_cast(imVal & 0xFF); + + if (imm8 == 0x03) { + EMIT_BYTE(opCode); + } + else { + EMIT_BYTE(opCode + 1); + EMIT_BYTE(imm8); + } + goto _EmitDone; + } + break; + + case kX86InstEncodingX86Jcc: + if (encoded == ENC_OPS(Label, None, None)) { + label = self->getLabelData(static_cast(o0)->getId()); + + if (self->hasAsmOption(Assembler::kOptionPredictedJumps)) { + if (options & kInstOptionTaken) + EMIT_BYTE(0x3E); + if (options & kInstOptionNotTaken) + EMIT_BYTE(0x2E); + } + + if (label->offset != -1) { + // Bound label. + static const intptr_t kRel8Size = 2; + static const intptr_t kRel32Size = 6; + + intptr_t offs = label->offset - (intptr_t)(cursor - self->_buffer); + ASMJIT_ASSERT(offs <= 0); + + if ((options & kInstOptionLongForm) == 0 && Utils::isInt8(offs - kRel8Size)) { + EMIT_BYTE(opCode); + EMIT_BYTE(offs - kRel8Size); + + options |= kInstOptionShortForm; + goto _EmitDone; + } + else { + EMIT_BYTE(0x0F); + EMIT_BYTE(opCode + 0x10); + EMIT_DWORD(static_cast(offs - kRel32Size)); + + options &= ~kInstOptionShortForm; + goto _EmitDone; + } + } + else { + // Non-bound label. + if (options & kInstOptionShortForm) { + EMIT_BYTE(opCode); + dispOffset = -1; + dispSize = 1; + relocId = -1; + goto _EmitDisplacement; + } + else { + EMIT_BYTE(0x0F); + EMIT_BYTE(opCode + 0x10); + dispOffset = -4; + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + } + } + break; + + case kX86InstEncodingX86Jecxz: + if (encoded == ENC_OPS(Reg, Label, None)) { + if (x86OpReg(o0) != kX86RegIndexCx) + goto _IllegalInst; + + if ((Arch == kArchX86 && o0->getSize() == 2) || + (Arch == kArchX64 && o0->getSize() == 4)) { + EMIT_BYTE(0x67); + } + + EMIT_BYTE(0xE3); + label = self->getLabelData(static_cast(o1)->getId()); + + if (label->offset != -1) { + // Bound label. + intptr_t offs = label->offset - (intptr_t)(cursor - self->_buffer) - 1; + if (!Utils::isInt8(offs)) + goto _IllegalInst; + + EMIT_BYTE(offs); + goto _EmitDone; + } + else { + // Non-bound label. + dispOffset = -1; + dispSize = 1; + relocId = -1; + goto _EmitDisplacement; + } + } + break; + + case kX86InstEncodingX86Jmp: + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode (0xE9). + opCode = 0xE9; + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + goto _EmitJmpOrCallAbs; + } + + if (encoded == ENC_OPS(Label, None, None)) { + label = self->getLabelData(static_cast(o0)->getId()); + if (label->offset != -1) { + // Bound label. + const intptr_t kRel8Size = 2; + const intptr_t kRel32Size = 5; + + intptr_t offs = label->offset - (intptr_t)(cursor - self->_buffer); + + if ((options & kInstOptionLongForm) == 0 && Utils::isInt8(offs - kRel8Size)) { + options |= kInstOptionShortForm; + + EMIT_BYTE(0xEB); + EMIT_BYTE(offs - kRel8Size); + goto _EmitDone; + } + else { + options &= ~kInstOptionShortForm; + + EMIT_BYTE(0xE9); + EMIT_DWORD(static_cast(offs - kRel32Size)); + goto _EmitDone; + } + } + else { + // Non-bound label. + if ((options & kInstOptionShortForm) != 0) { + EMIT_BYTE(0xEB); + dispOffset = -1; + dispSize = 1; + relocId = -1; + goto _EmitDisplacement; + } + else { + EMIT_BYTE(0xE9); + dispOffset = -4; + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + } + } + break; + + case kX86InstEncodingX86Lea: + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Mov: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + + // Asmjit uses segment registers indexed from 1 to 6, leaving zero as + // "no segment register used". We have to fix this (decrement the index + // of the register) when emitting MOV instructions which move to/from + // a segment register. The segment register is always `opReg`, because + // the MOV instruction uses RM or MR encoding. + + // Sreg <- Reg + if (static_cast(o0)->isSeg()) { + ASMJIT_ASSERT(static_cast(o1)->isGpw() || + static_cast(o1)->isGpd() || + static_cast(o1)->isGpq() ); + + // `opReg` is the segment register. + opReg--; + opCode = 0x8E; + + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + goto _EmitX86R; + } + // Reg <- Sreg + else if (static_cast(o1)->isSeg()) { + ASMJIT_ASSERT(static_cast(o0)->isGpw() || + static_cast(o0)->isGpd() || + static_cast(o0)->isGpq() ); + + // `opReg` is the segment register. + opReg = static_cast(rmReg) - 1; + rmReg = x86OpReg(o0); + opCode = 0x8C; + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + goto _EmitX86R; + } + // Reg <- Reg + else { + ASMJIT_ASSERT(static_cast(o0)->isGpb() || + static_cast(o0)->isGpw() || + static_cast(o0)->isGpd() || + static_cast(o0)->isGpq() ); + + opCode = 0x8A + (o0->getSize() != 1); + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + goto _EmitX86R; + } + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + + // Sreg <- Mem + if (static_cast(o0)->isRegType(kX86RegTypeSeg)) { + opCode = 0x8E; + opReg--; + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + goto _EmitX86M; + } + // Reg <- Mem + else { + ASMJIT_ASSERT(static_cast(o0)->isGpb() || + static_cast(o0)->isGpw() || + static_cast(o0)->isGpd() || + static_cast(o0)->isGpq() ); + opCode = 0x8A + (o0->getSize() != 1); + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + + // X86Mem <- Sreg + if (static_cast(o1)->isSeg()) { + opCode = 0x8C; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + goto _EmitX86M; + } + // X86Mem <- Reg + else { + ASMJIT_ASSERT(static_cast(o1)->isGpb() || + static_cast(o1)->isGpw() || + static_cast(o1)->isGpd() || + static_cast(o1)->isGpq() ); + opCode = 0x88 + (o1->getSize() != 1); + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Reg, Imm, None)) { + // 64-bit immediate in 64-bit mode is allowed. + imVal = static_cast(o1)->getInt64(); + imLen = o0->getSize(); + + opReg = 0; + rmReg = x86OpReg(o0); + + // Optimize instruction size by using 32-bit immediate if possible. + if (Arch == kArchX64 && imLen == 8 && Utils::isInt32(imVal)) { + opCode = 0xC7; + ADD_REX_W(1); + imLen = 4; + goto _EmitX86R; + } + else { + opCode = 0xB0 + (static_cast(o0->getSize() != 1) << 3); + opReg = rmReg; + + ADD_66H_P_BY_SIZE(imLen); + ADD_REX_W_BY_SIZE(imLen); + goto _EmitX86OpWithOpReg; + } + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + uint32_t memSize = o0->getSize(); + + if (memSize == 0) + goto _IllegalInst; + + imVal = static_cast(o1)->getInt64(); + imLen = Utils::iMin(memSize, 4); + + opCode = 0xC6 + (memSize != 1); + opReg = 0; + ADD_66H_P_BY_SIZE(memSize); + ADD_REX_W_BY_SIZE(memSize); + + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86MovsxMovzx: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Movsxd: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_REX_W(true); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_REX_W(true); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86MovPtr: + if (encoded == ENC_OPS(Reg, Imm, None)) { + if (x86OpReg(o0) != 0) + goto _IllegalInst; + + opCode += o0->getSize() != 1; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + imVal = static_cast(o1)->getInt64(); + imLen = self->_regSize; + goto _EmitX86Op; + } + + // The following instruction uses the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Imm, Reg, None)) { + if (x86OpReg(o1) != 0) + goto _IllegalInst; + + opCode += o1->getSize() != 1; + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + imVal = static_cast(o0)->getInt64(); + imLen = self->_regSize; + goto _EmitX86Op; + } + break; + + case kX86InstEncodingX86Push: + if (encoded == ENC_OPS(Reg, None, None)) { + if (o0->isRegType(kX86RegTypeSeg)) { + uint32_t segment = x86OpReg(o0); + if (segment >= kX86SegCount) + goto _IllegalInst; + + if (segment >= kX86SegFs) + EMIT_BYTE(0x0F); + + EMIT_BYTE(x86OpCodePushSeg[segment]); + goto _EmitDone; + } + else { + goto _GroupPop_Gp; + } + } + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + imLen = Utils::isInt8(imVal) ? 1 : 4; + + EMIT_BYTE(imLen == 1 ? 0x6A : 0x68); + goto _EmitImm; + } + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingX86Pop: + if (encoded == ENC_OPS(Reg, None, None)) { + if (o0->isRegType(kX86RegTypeSeg)) { + uint32_t segment = x86OpReg(o0); + if (segment == kX86SegCs || segment >= kX86SegCount) + goto _IllegalInst; + + if (segment >= kX86SegFs) + EMIT_BYTE(0x0F); + + EMIT_BYTE(x86OpCodePopSeg[segment]); + goto _EmitDone; + } + else { +_GroupPop_Gp: + // We allow 2 byte, 4 byte, and 8 byte register sizes, althought PUSH + // and POP only allow 2 bytes or native size. On 64-bit we simply + // PUSH/POP 64-bit register even if 32-bit register was given. + if (o0->getSize() < 2) + goto _IllegalInst; + + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86OpReg(o0); + + ADD_66H_P_BY_SIZE(o0->getSize()); + goto _EmitX86OpWithOpReg; + } + } + + if (encoded == ENC_OPS(Mem, None, None)) { + if (o0->getSize() != 2 && o0->getSize() != self->getRegSize()) + goto _IllegalInst; + + ADD_66H_P_BY_SIZE(o0->getSize()); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Rep: + // Emit REP 0xF2 or 0xF3 prefix first. + EMIT_BYTE(0xF2 + opReg); + goto _EmitX86Op; + + case kX86InstEncodingX86Ret: + if (encoded == ENC_OPS(None, None, None)) { + EMIT_BYTE(0xC3); + goto _EmitDone; + } + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + if (imVal == 0) { + EMIT_BYTE(0xC3); + goto _EmitDone; + } + else { + EMIT_BYTE(0xC2); + imLen = 2; + goto _EmitImm; + } + } + break; + + case kX86InstEncodingX86Rot: + opCode += o0->getSize() != 1; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(static_cast(o1)->isRegCode(kX86RegTypeGpbLo, kX86RegIndexCx)); + opCode += 2; + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + ASMJIT_ASSERT(static_cast(o1)->isRegCode(kX86RegTypeGpbLo, kX86RegIndexCx)); + opCode += 2; + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64() & 0xFF; + imLen = imVal != 1; + if (imLen) + opCode -= 0x10; + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + if (o0->getSize() == 0) + goto _IllegalInst; + + imVal = static_cast(o1)->getInt64() & 0xFF; + imLen = imVal != 1; + if (imLen) + opCode -= 0x10; + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Set: + if (encoded == ENC_OPS(Reg, None, None)) { + ASMJIT_ASSERT(o0->getSize() == 1); + + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + ASMJIT_ASSERT(o0->getSize() <= 1); + + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86ShldShrd: + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ASMJIT_ASSERT(o0->getSize() == o1->getSize()); + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, Imm)) { + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + // The following instructions use opCode + 1. + opCode++; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + ASMJIT_ASSERT(static_cast(o2)->isRegCode(kX86RegTypeGpbLo, kX86RegIndexCx)); + ASMJIT_ASSERT(o0->getSize() == o1->getSize()); + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, Reg)) { + ASMJIT_ASSERT(static_cast(o2)->isRegCode(kX86RegTypeGpbLo, kX86RegIndexCx)); + + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Test: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(o0->getSize() == o1->getSize()); + + opCode += o0->getSize() != 1; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode() + (o0->getSize() != 1); + opReg = x86ExtractO(opCode); + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = Utils::iMin(o0->getSize(), 4); + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + // Alternate Form - AL, AX, EAX, RAX. + if (x86OpReg(o0) == 0) { + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= 0xA8 + (o0->getSize() != 1); + goto _EmitX86Op; + } + + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + if (o0->getSize() == 0) + goto _IllegalInst; + + imVal = static_cast(o1)->getInt64(); + imLen = Utils::iMin(o0->getSize(), 4); + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Xchg: + if (encoded == ENC_OPS(Reg, Mem, None)) { + opCode += o0->getSize() != 1; + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingX86Xadd: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + // Special opcode for 'xchg ?ax, reg'. + if (code == kX86InstIdXchg && o0->getSize() > 1 && (opReg == 0 || rmReg == 0)) { + opCode &= kX86InstOpCode_PP_66 | kX86InstOpCode_W; + opCode |= 0x90; + // One of `xchg a, b` or `xchg b, a` is AX/EAX/RAX. + opReg += rmReg; + goto _EmitX86OpWithOpReg; + } + + opCode += o0->getSize() != 1; + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Crc: + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + if (!Utils::inInterval(static_cast(o0)->getRegType(), kX86RegTypeGpd, kX86RegTypeGpq)) + goto _IllegalInst; + + opCode += o0->getSize() != 1; + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + if (!Utils::inInterval(static_cast(o0)->getRegType(), kX86RegTypeGpd, kX86RegTypeGpq)) + goto _IllegalInst; + + opCode += o0->getSize() != 1; + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Prefetch: + if (encoded == ENC_OPS(Mem, Imm, None)) { + opReg = static_cast(o1)->getUInt32() & 0x3; + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingX86Fence: + if (Arch == kArchX64 && (opCode & kX86InstOpCode_W_Mask)) { + EMIT_BYTE(kX86ByteRex | kX86ByteRexW); + } + + EMIT_BYTE(0x0F); + EMIT_BYTE(opCode); + EMIT_BYTE(0xC0 | (opReg << 3)); + goto _EmitDone; + + // ------------------------------------------------------------------------ + // [Fpu] + // ------------------------------------------------------------------------ + + case kX86InstEncodingFpuOp: + goto _EmitFpuOp; + + case kX86InstEncodingFpuArith: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + + // We switch to the alternative opcode if the first operand is zero. + if (opReg == 0) { +_EmitFpArith_Reg: + opCode = 0xD800 + ((opCode >> 8) & 0xFF) + static_cast(rmReg); + goto _EmitFpuOp; + } + else if (rmReg == 0) { + rmReg = opReg; + opCode = 0xDC00 + ((opCode >> 0) & 0xFF) + static_cast(rmReg); + goto _EmitFpuOp; + } + else { + goto _IllegalInst; + } + } + + if (encoded == ENC_OPS(Mem, None, None)) { + // 0xD8/0xDC, depends on the size of the memory operand; opReg has been + // set already. +_EmitFpArith_Mem: + opCode = (o0->getSize() == 4) ? 0xD8 : 0xDC; + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingFpuCom: + if (encoded == ENC_OPS(None, None, None)) { + rmReg = 1; + goto _EmitFpArith_Reg; + } + + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = x86OpReg(o0); + goto _EmitFpArith_Reg; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + goto _EmitFpArith_Mem; + } + break; + + case kX86InstEncodingFpuFldFst: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + + if (o0->getSize() == 4 && info.hasFlag(kX86InstFlagMem4)) { + goto _EmitX86M; + } + + if (o0->getSize() == 8 && info.hasFlag(kX86InstFlagMem8)) { + opCode += 4; + goto _EmitX86M; + } + + if (o0->getSize() == 10 && info.hasFlag(kX86InstFlagMem10)) { + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86ExtractO(opCode); + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Reg, None, None)) { + if (code == kX86InstIdFld) { + opCode = 0xD9C0 + x86OpReg(o0); + goto _EmitFpuOp; + } + + if (code == kX86InstIdFst) { + opCode = 0xDDD0 + x86OpReg(o0); + goto _EmitFpuOp; + } + + if (code == kX86InstIdFstp) { + opCode = 0xDDD8 + x86OpReg(o0); + goto _EmitFpuOp; + } + } + break; + + + case kX86InstEncodingFpuM: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + + if (o0->getSize() == 2 && info.hasFlag(kX86InstFlagMem2)) { + opCode += 4; + goto _EmitX86M; + } + + if (o0->getSize() == 4 && info.hasFlag(kX86InstFlagMem4)) { + goto _EmitX86M; + } + + if (o0->getSize() == 8 && info.hasFlag(kX86InstFlagMem8)) { + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86ExtractO(opCode); + goto _EmitX86M; + } + } + break; + + case kX86InstEncodingFpuRDef: + if (encoded == ENC_OPS(None, None, None)) { + opCode += 1; + goto _EmitFpuOp; + } + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingFpuR: + if (encoded == ENC_OPS(Reg, None, None)) { + opCode += x86OpReg(o0); + goto _EmitFpuOp; + } + break; + + case kX86InstEncodingFpuStsw: + if (encoded == ENC_OPS(Reg, None, None)) { + if (x86OpReg(o0) != 0) + goto _IllegalInst; + + opCode = extendedInfo.getSecondaryOpCode(); + goto _EmitFpuOp; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + // ------------------------------------------------------------------------ + // [Ext] + // ------------------------------------------------------------------------ + + case kX86InstEncodingSimdPextrw: + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ADD_66H_P(x86IsXmm(static_cast(o1))); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, Imm)) { + // Secondary opcode of 'pextrw' instruction (SSE4.1). + opCode = extendedInfo.getSecondaryOpCode(); + ADD_66H_P(x86IsXmm(static_cast(o1))); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdExtract: + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ADD_66H_P(x86IsXmm(static_cast(o1))); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, Imm)) { + ADD_66H_P(x86IsXmm(static_cast(o1))); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdMov: + case kX86InstEncodingSimdMovNoRexW: + ASMJIT_ASSERT(extendedInfo._opFlags[0] != 0); + ASMJIT_ASSERT(extendedInfo._opFlags[1] != 0); + + // Check parameters GPD|GPQ|MMX|XMM <- GPD|GPQ|MMX|XMM|Mem|Imm. + ASMJIT_ASSERT(!((o0->isMem() && (extendedInfo._opFlags[0] & kX86InstOpMem) == 0) || + (o0->isRegType(kX86RegTypeMm ) && (extendedInfo._opFlags[0] & kX86InstOpMm ) == 0) || + (o0->isRegType(kX86RegTypeXmm) && (extendedInfo._opFlags[0] & kX86InstOpXmm) == 0) || + (o0->isRegType(kX86RegTypeGpd) && (extendedInfo._opFlags[0] & kX86InstOpGd ) == 0) || + (o0->isRegType(kX86RegTypeGpq) && (extendedInfo._opFlags[0] & kX86InstOpGq ) == 0) || + (o1->isMem() && (extendedInfo._opFlags[1] & kX86InstOpMem) == 0) || + (o1->isRegType(kX86RegTypeMm ) && (extendedInfo._opFlags[1] & kX86InstOpMm ) == 0) || + (o1->isRegType(kX86RegTypeXmm) && (extendedInfo._opFlags[1] & kX86InstOpXmm) == 0) || + (o1->isRegType(kX86RegTypeGpd) && (extendedInfo._opFlags[1] & kX86InstOpGd ) == 0) || + (o1->isRegType(kX86RegTypeGpq) && (extendedInfo._opFlags[1] & kX86InstOpGq ) == 0) )); + + // GP|MMX|XMM <- GP|MMX|XMM + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_REX_W(x86IsGpq(static_cast(o0)) && (info.getEncoding() != kX86InstEncodingSimdMovNoRexW)); + ADD_REX_W(x86IsGpq(static_cast(o1)) && (info.getEncoding() != kX86InstEncodingSimdMovNoRexW)); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + // GP|MMX|XMM <- Mem + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_REX_W(x86IsGpq(static_cast(o0)) && (info.getEncoding() != kX86InstEncodingSimdMovNoRexW)); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + + // The following instruction uses opCode[1]. + opCode = extendedInfo.getSecondaryOpCode(); + + // X86Mem <- GP|MMX|XMM + if (encoded == ENC_OPS(Mem, Reg, None)) { + ADD_REX_W(x86IsGpq(static_cast(o1)) && (info.getEncoding() != kX86InstEncodingSimdMovNoRexW)); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdMovBe: + if (encoded == ENC_OPS(Reg, Mem, None)) { + if (o0->getSize() == 1) + goto _IllegalInst; + + ADD_66H_P_BY_SIZE(o0->getSize()); + ADD_REX_W_BY_SIZE(o0->getSize()); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + + // The following instruction uses the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Mem, Reg, None)) { + if (o1->getSize() == 1) + goto _IllegalInst; + + ADD_66H_P_BY_SIZE(o1->getSize()); + ADD_REX_W_BY_SIZE(o1->getSize()); + + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdMovD: +_EmitMmMovD: + opReg = x86OpReg(o0); + ADD_66H_P(x86IsXmm(static_cast(o0))); + + // MMX/XMM <- Gp + if (encoded == ENC_OPS(Reg, Reg, None) && static_cast(o1)->isGp()) { + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + // MMX/XMM <- Mem + if (encoded == ENC_OPS(Reg, Mem, None)) { + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86OpReg(o1); + ADD_66H_P(x86IsXmm(static_cast(o1))); + + // GP <- MMX/XMM + if (encoded == ENC_OPS(Reg, Reg, None) && static_cast(o0)->isGp()) { + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + + // Mem <- MMX/XMM + if (encoded == ENC_OPS(Mem, Reg, None)) { + rmMem = x86OpMem(o0); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdMovQ: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + + // MMX <- MMX + if (static_cast(o0)->isMm() && static_cast(o1)->isMm()) { + opCode = kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F | 0x6F; + goto _EmitX86R; + } + + // XMM <- XMM + if (static_cast(o0)->isXmm() && static_cast(o1)->isXmm()) { + opCode = kX86InstOpCode_PP_F3 | kX86InstOpCode_MM_0F | 0x7E; + goto _EmitX86R; + } + + // MMX <- XMM (MOVDQ2Q) + if (static_cast(o0)->isMm() && static_cast(o1)->isXmm()) { + opCode = kX86InstOpCode_PP_F2 | kX86InstOpCode_MM_0F | 0xD6; + goto _EmitX86R; + } + + // XMM <- MMX (MOVQ2DQ) + if (static_cast(o0)->isXmm() && static_cast(o1)->isMm()) { + opCode = kX86InstOpCode_PP_F3 | kX86InstOpCode_MM_0F | 0xD6; + goto _EmitX86R; + } + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + + // MMX <- Mem + if (static_cast(o0)->isMm()) { + opCode = kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F | 0x6F; + goto _EmitX86M; + } + + // XMM <- Mem + if (static_cast(o0)->isXmm()) { + opCode = kX86InstOpCode_PP_F3 | kX86InstOpCode_MM_0F | 0x7E; + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + + // Mem <- MMX + if (static_cast(o1)->isMm()) { + opCode = kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F | 0x7F; + goto _EmitX86M; + } + + // Mem <- XMM + if (static_cast(o1)->isXmm()) { + opCode = kX86InstOpCode_PP_66 | kX86InstOpCode_MM_0F | 0xD6; + goto _EmitX86M; + } + } + + if (Arch == kArchX64) { + // MOVQ in other case is simply a MOVD instruction promoted to 64-bit. + opCode |= kX86InstOpCode_W; + goto _EmitMmMovD; + } + break; + + case kX86InstEncodingSimdRm_PQ: + ADD_66H_P(o0->isRegType(kX86RegTypeXmm) || o1->isRegType(kX86RegTypeXmm)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingSimdRm_Q: + ADD_REX_W(o0->isRegType(kX86RegTypeGpq) || o1->isRegType(kX86RegTypeGpq) || (o1->isMem() && o1->getSize() == 8)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingSimdRm: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdRm_P: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_66H_P(x86IsXmm(static_cast(o0)) | x86IsXmm(static_cast(o1))); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_66H_P(x86IsXmm(static_cast(o0))); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdRmRi: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + + // The following instruction uses the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86ExtractO(opCode); + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + break; + + case kX86InstEncodingSimdRmRi_P: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_66H_P(x86IsXmm(static_cast(o0)) | x86IsXmm(static_cast(o1))); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_66H_P(x86IsXmm(static_cast(o0))); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + + // The following instruction uses the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86ExtractO(opCode); + + if (encoded == ENC_OPS(Reg, Imm, None)) { + ADD_66H_P(x86IsXmm(static_cast(o0))); + + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + rmReg = x86OpReg(o0); + goto _EmitX86R; + } + break; + + case kX86InstEncodingSimdRmi: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + case kX86InstEncodingSimdRmi_P: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ADD_66H_P(x86IsXmm(static_cast(o0)) | x86IsXmm(static_cast(o1))); + + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + ADD_66H_P(x86IsXmm(static_cast(o0))); + + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + // ------------------------------------------------------------------------ + // [Group - Extrq / Insertq (SSE4a)] + // ------------------------------------------------------------------------ + + case kX86InstEncodingSimdExtrq: + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + + if (encoded == ENC_OPS(Reg, Reg, None)) + goto _EmitX86R; + + // The following instruction uses the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Reg, Imm, Imm)) { + imVal = (static_cast(o1)->getUInt32() ) + + (static_cast(o2)->getUInt32() << 8) ; + imLen = 2; + + rmReg = x86ExtractO(opCode); + goto _EmitX86R; + } + break; + + case kX86InstEncodingSimdInsertq: + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + + if (encoded == ENC_OPS(Reg, Reg, None)) + goto _EmitX86R; + + // The following instruction uses the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Reg, Reg, Imm) && o3->isImm()) { + imVal = (static_cast(o2)->getUInt32() ) + + (static_cast(o3)->getUInt32() << 8) ; + imLen = 2; + goto _EmitX86R; + } + break; + + // ------------------------------------------------------------------------ + // [Group - 3dNow] + // ------------------------------------------------------------------------ + + case kX86InstEncodingSimd3dNow: + // Every 3dNow instruction starts with 0x0F0F and the actual opcode is + // stored as 8-bit immediate. + imVal = opCode & 0xFF; + imLen = 1; + + opCode = kX86InstOpCode_MM_0F | 0x0F; + opReg = x86OpReg(o0); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + rmReg = x86OpReg(o1); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + rmMem = x86OpMem(o1); + goto _EmitX86M; + } + break; + + // ------------------------------------------------------------------------ + // [Avx] + // ------------------------------------------------------------------------ + + case kX86InstEncodingAvxOp: + goto _EmitAvxOp; + + case kX86InstEncodingAvxM: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = x86OpMem(o0); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxMr_OptL: + ADD_VEX_L(x86IsYmm(o0) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxMr: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxMri_OptL: + ADD_VEX_L(x86IsYmm(o0) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxMri: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Mem, Reg, Imm)) { + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRm_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRm: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRmi: +CaseAvxRmi: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRmi_OptW: + ADD_REX_W(x86IsGpq(static_cast(o0)) | x86IsGpq(o1)); + goto CaseAvxRmi; + + case kX86InstEncodingAvxRmi_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(o1)); + goto CaseAvxRmi; + + case kX86InstEncodingAvxRvm: +_EmitAvxRvm: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { +_EmitAvxRvm_Reg: + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRvm_OptW: + ADD_REX_W(x86IsGpq(static_cast(o0)) | x86IsGpq(static_cast(o1))); + goto _EmitAvxRvm; + + case kX86InstEncodingAvxRvm_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + goto _EmitAvxRvm; + + case kX86InstEncodingAvxRvmr_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRvmr: + if (!o3->isReg()) + goto _IllegalInst; + + imVal = x86OpReg(o3) << 4; + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRvmi_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRvmi: + if (!o3->isImm()) + goto _IllegalInst; + + imVal = static_cast(o3)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRmv: +CaseAvxRmv: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRmv_OptW: + ADD_REX_W(x86IsGpq(static_cast(o0)) | x86IsGpq(static_cast(o2))); + goto CaseAvxRmv; + + case kX86InstEncodingAvxRmvi: + if (!o3->isImm()) + goto _IllegalInst; + + imVal = static_cast(o3)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxMovDQ: + if (encoded == ENC_OPS(Reg, Reg, None)) { + if (static_cast(o0)->isGp()) { + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitAvxR; + } + + if (static_cast(o1)->isGp()) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + } + + goto _AvxRmMr_AfterRegRegCheck; + + case kX86InstEncodingAvxRmMr_OptL: + ADD_VEX_L(x86IsYmm(o0) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRmMr: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + +_AvxRmMr_AfterRegRegCheck: + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + + // The following instruction uses the secondary opcode. + opCode &= kX86InstOpCode_L_Mask; + opCode |= extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRvmRmi_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRvmRmi: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitAvxM; + } + + // The following instructions use the secondary opcode. + opCode &= kX86InstOpCode_L_Mask; + opCode |= extendedInfo.getSecondaryOpCode(); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRvmMr: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitAvxM; + } + + // The following instructions use the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o1); + rmReg = x86OpReg(o0); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRvmMvr_OptL: + ADD_VEX_L(x86IsYmm(o0) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRvmMvr: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitAvxM; + } + + // The following instruction uses the secondary opcode. + opCode &= kX86InstOpCode_L_Mask; + opCode |= extendedInfo.getSecondaryOpCode(); + + if (encoded == ENC_OPS(Mem, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o2), x86OpReg(o1)); + rmMem = x86OpMem(o0); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRvmVmi_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRvmVmi: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitAvxM; + } + + // The following instruction uses the secondary opcode. + opCode &= kX86InstOpCode_L_Mask; + opCode |= extendedInfo.getSecondaryOpCode(); + opReg = x86ExtractO(opCode); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxVm: +CaseAvxVm: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxVm_OptW: + ADD_REX_W(x86IsGpq(static_cast(o0)) | x86IsGpq(o1)); + goto CaseAvxVm; + + case kX86InstEncodingAvxVmi_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxVmi: + imVal = static_cast(o3)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmReg = x86OpReg(o1); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxRvrmRvmr_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingAvxRvrmRvmr: + if (encoded == ENC_OPS(Reg, Reg, Reg) && o3->isReg()) { + imVal = x86OpReg(o3) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Reg) && o3->isMem()) { + imVal = x86OpReg(o2) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o3); + + ADD_VEX_W(true); + goto _EmitAvxM; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem) && o3->isReg()) { + imVal = x86OpReg(o3) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxMovSsSd: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + goto _EmitAvxRvm_Reg; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitAvxM; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode = extendedInfo.getSecondaryOpCode(); + opReg = x86OpReg(o1); + rmMem = x86OpMem(o0); + goto _EmitAvxM; + } + break; + + case kX86InstEncodingAvxGatherEx: + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmMem = x86OpMem(o1); + + uint32_t vSib = rmMem->getVSib(); + if (vSib == kX86MemVSibGpz) + goto _IllegalInst; + + ADD_VEX_L(vSib == kX86MemVSibYmm); + goto _EmitAvxV; + } + break; + + case kX86InstEncodingAvxGather: + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmMem = x86OpMem(o1); + + uint32_t vSib = rmMem->getVSib(); + if (vSib == kX86MemVSibGpz) + goto _IllegalInst; + + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o2))); + goto _EmitAvxV; + } + break; + + // ------------------------------------------------------------------------ + // [FMA4] + // ------------------------------------------------------------------------ + + case kX86InstEncodingFma4_OptL: + // It's fine to just check the first operand, second is just for sanity. + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingFma4: + if (encoded == ENC_OPS(Reg, Reg, Reg) && o3->isReg()) { + imVal = x86OpReg(o3) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Reg) && o3->isMem()) { + imVal = x86OpReg(o2) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o3); + + ADD_VEX_W(true); + goto _EmitAvxM; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem) && o3->isReg()) { + imVal = x86OpReg(o3) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + + goto _EmitAvxM; + } + break; + + // ------------------------------------------------------------------------ + // [XOP] + // ------------------------------------------------------------------------ + + case kX86InstEncodingXopRm_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(o1)); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingXopRm: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitXopM; + } + break; + + case kX86InstEncodingXopRvmRmv: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmReg = x86OpReg(o1); + + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmMem = x86OpMem(o1); + + goto _EmitXopM; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + + ADD_VEX_W(true); + goto _EmitXopM; + } + + break; + + case kX86InstEncodingXopRvmRmi: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmReg = x86OpReg(o1); + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o2)); + rmMem = x86OpMem(o1); + + goto _EmitXopM; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + + ADD_VEX_W(true); + goto _EmitXopM; + } + + // The following instructions use the secondary opcode. + opCode = extendedInfo.getSecondaryOpCode(); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = x86OpReg(o0); + rmReg = x86OpReg(o1); + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = x86OpReg(o0); + rmMem = x86OpMem(o1); + goto _EmitXopM; + } + break; + + case kX86InstEncodingXopRvmr_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingXopRvmr: + if (!o3->isReg()) + goto _IllegalInst; + + imVal = x86OpReg(o3) << 4; + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitXopM; + } + break; + + case kX86InstEncodingXopRvmi_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingXopRvmi: + if (!o3->isImm()) + goto _IllegalInst; + + imVal = static_cast(o3)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + goto _EmitXopM; + } + break; + + case kX86InstEncodingXopRvrmRvmr_OptL: + ADD_VEX_L(x86IsYmm(static_cast(o0)) | x86IsYmm(static_cast(o1))); + ASMJIT_FALLTHROUGH; + + case kX86InstEncodingXopRvrmRvmr: + if (encoded == ENC_OPS(Reg, Reg, Reg) && o3->isReg()) { + imVal = x86OpReg(o3) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmReg = x86OpReg(o2); + + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Reg, Reg) && o3->isMem()) { + imVal = x86OpReg(o2) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o3); + + ADD_VEX_W(true); + goto _EmitXopM; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem) && o3->isReg()) { + imVal = x86OpReg(o3) << 4; + imLen = 1; + + opReg = x86RegAndVvvv(x86OpReg(o0), x86OpReg(o1)); + rmMem = x86OpMem(o2); + + goto _EmitXopM; + } + break; + + case kX86InstEncodingXopVm_OptW: + ADD_REX_W(x86IsGpq(static_cast(o0)) | x86IsGpq(o1)); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmReg = x86OpReg(o1); + goto _EmitXopR; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); + rmMem = x86OpMem(o1); + goto _EmitXopM; + } + break; + } + + // -------------------------------------------------------------------------- + // [Illegal] + // -------------------------------------------------------------------------- + +_IllegalInst: + self->setLastError(kErrorIllegalInst); +#if defined(ASMJIT_DEBUG) + assertIllegal = true; +#endif // ASMJIT_DEBUG + goto _EmitDone; + +_IllegalAddr: + self->setLastError(kErrorIllegalAddresing); +#if defined(ASMJIT_DEBUG) + assertIllegal = true; +#endif // ASMJIT_DEBUG + goto _EmitDone; + +_IllegalDisp: + self->setLastError(kErrorIllegalDisplacement); +#if defined(ASMJIT_DEBUG) + assertIllegal = true; +#endif // ASMJIT_DEBUG + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - X86] + // -------------------------------------------------------------------------- + +_EmitX86Op: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64) { + uint32_t rex = x86RexFromOpCodeAndOptions(opCode, options); + + if (rex & ~static_cast(_kX86InstOptionNoRex)) { + rex |= kX86ByteRex; + EMIT_BYTE(rex); + + if (x86RexIsInvalid(rex)) + goto _IllegalInst; + } + } + + // Instruction opcodes. + EMIT_MM(opCode); + EMIT_BYTE(opCode); + + if (imLen != 0) + goto _EmitImm; + else + goto _EmitDone; + +_EmitX86OpWithOpReg: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64) { + uint32_t rex = x86RexFromOpCodeAndOptions(opCode, options); + + rex += (opReg >> 3); // Rex.B (0x01). + + if (rex & ~static_cast(_kX86InstOptionNoRex)) { + rex |= kX86ByteRex; + opReg &= 0x07; + EMIT_BYTE(rex); + + if (x86RexIsInvalid(rex)) + goto _IllegalInst; + } + } + + // Instruction opcodes. + opCode += opReg; + EMIT_MM(opCode); + EMIT_BYTE(opCode); + + if (imLen != 0) + goto _EmitImm; + else + goto _EmitDone; + +_EmitX86R: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64) { + uint32_t rex = x86RexFromOpCodeAndOptions(opCode, options); + + rex += static_cast(opReg & 0x08) >> 1; // Rex.R (0x04). + rex += static_cast(rmReg) >> 3; // Rex.B (0x01). + + if (rex & ~static_cast(_kX86InstOptionNoRex)) { + rex |= kX86ByteRex; + opReg &= 0x07; + rmReg &= 0x07; + EMIT_BYTE(rex); + + if (x86RexIsInvalid(rex)) + goto _IllegalInst; + } + } + + // Instruction opcodes. + EMIT_MM(opCode); + EMIT_BYTE(opCode); + + // ModR. + EMIT_BYTE(x86EncodeMod(3, opReg, static_cast(rmReg))); + + if (imLen != 0) + goto _EmitImm; + else + goto _EmitDone; + +_EmitX86M: + ASMJIT_ASSERT(rmMem != nullptr); + ASMJIT_ASSERT(rmMem->getOp() == Operand::kTypeMem); + + mBase = rmMem->getBase(); + mIndex = rmMem->getIndex(); + + // Size override prefix. + if (rmMem->hasBaseOrIndex() && rmMem->getMemType() != kMemTypeLabel) { + if (Arch == kArchX86) { + if (!rmMem->hasGpdBase()) + EMIT_BYTE(0x67); + } + else { + if (rmMem->hasGpdBase()) + EMIT_BYTE(0x67); + } + } + + // Segment override prefix. + if (rmMem->hasSegment()) { + EMIT_BYTE(x86SegmentPrefix[rmMem->getSegment()]); + } + + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64) { + uint32_t rex = x86RexFromOpCodeAndOptions(opCode, options); + + rex += static_cast(opReg & 8) >> 1; // Rex.R (0x04). + rex += static_cast(mIndex - 8 < 8) << 1; // Rex.X (0x02). + rex += static_cast(mBase - 8 < 8); // Rex.B (0x01). + + if (rex & ~static_cast(_kX86InstOptionNoRex)) { + rex |= kX86ByteRex; + opReg &= 0x07; + EMIT_BYTE(rex); + + if (x86RexIsInvalid(rex)) + goto _IllegalInst; + } + + mBase &= 0x07; + } + + // Instruction opcodes. + EMIT_MM(opCode); + EMIT_BYTE(opCode); + // ... Fall through ... + + // -------------------------------------------------------------------------- + // [Emit - SIB] + // -------------------------------------------------------------------------- + +_EmitSib: + dispOffset = rmMem->getDisplacement(); + if (rmMem->isBaseIndexType()) { + if (mIndex >= kInvalidReg) { + if (mBase == kX86RegIndexSp) { + if (dispOffset == 0) { + // [Esp/Rsp/R12]. + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(0, 4, 4)); + } + else if (Utils::isInt8(dispOffset)) { + // [Esp/Rsp/R12 + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, 4)); + EMIT_BYTE(x86EncodeSib(0, 4, 4)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Esp/Rsp/R12 + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, 4)); + EMIT_BYTE(x86EncodeSib(0, 4, 4)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + else if (mBase != kX86RegIndexBp && dispOffset == 0) { + // [Base]. + EMIT_BYTE(x86EncodeMod(0, opReg, mBase)); + } + else if (Utils::isInt8(dispOffset)) { + // [Base + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, mBase)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Base + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, mBase)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + else { + uint32_t shift = rmMem->getShift(); + + // Esp/Rsp/R12 register can't be used as an index. + mIndex &= 0x07; + ASMJIT_ASSERT(mIndex != kX86RegIndexSp); + + if (mBase != kX86RegIndexBp && dispOffset == 0) { + // [Base + Index * Scale]. + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + } + else if (Utils::isInt8(dispOffset)) { + // [Base + Index * Scale + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Base + Index * Scale + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + } + else if (Arch == kArchX86) { + if (mIndex >= kInvalidReg) { + // [Disp32]. + EMIT_BYTE(x86EncodeMod(0, opReg, 5)); + } + else { + // [Index * Scale + Disp32]. + uint32_t shift = rmMem->getShift(); + ASMJIT_ASSERT(mIndex != kX86RegIndexSp); + + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, 5)); + } + + if (rmMem->getMemType() == kMemTypeAbsolute) { + // [Disp32]. + EMIT_DWORD(static_cast(dispOffset)); + } + else if (rmMem->getMemType() == kMemTypeLabel) { + // Relative->Absolute [x86 mode]. + label = self->getLabelData(rmMem->_vmem.base); + relocId = self->_relocations.getLength(); + + RelocData rd; + rd.type = kRelocRelToAbs; + rd.size = 4; + rd.from = static_cast((uintptr_t)(cursor - self->_buffer)); + rd.data = static_cast(dispOffset); + + if (self->_relocations.append(rd) != kErrorOk) + return self->setLastError(kErrorNoHeapMemory); + + if (label->offset != -1) { + // Bound label. + self->_relocations[relocId].data += static_cast(label->offset); + EMIT_DWORD(0); + } + else { + // Non-bound label. + dispOffset = -4 - imLen; + dispSize = 4; + goto _EmitDisplacement; + } + } + else { + // RIP->Absolute [x86 mode]. + relocId = self->_relocations.getLength(); + + RelocData rd; + rd.type = kRelocRelToAbs; + rd.size = 4; + rd.from = static_cast((uintptr_t)(cursor - self->_buffer)); + rd.data = rd.from + static_cast(dispOffset); + + if (self->_relocations.append(rd) != kErrorOk) + return self->setLastError(kErrorNoHeapMemory); + + EMIT_DWORD(0); + } + } + else /* if (Arch === kArchX64) */ { + if (rmMem->getMemType() == kMemTypeAbsolute) { + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + if (mIndex >= kInvalidReg) { + // [Disp32]. + EMIT_BYTE(x86EncodeSib(0, 4, 5)); + } + else { + // [Disp32 + Index * Scale]. + mIndex &= 0x07; + ASMJIT_ASSERT(mIndex != kX86RegIndexSp); + + uint32_t shift = rmMem->getShift(); + EMIT_BYTE(x86EncodeSib(shift, mIndex, 5)); + } + EMIT_DWORD(static_cast(dispOffset)); + } + else if (rmMem->getMemType() == kMemTypeLabel) { + // [RIP + Disp32]. + label = self->getLabelData(rmMem->_vmem.base); + + // Indexing is invalid. + if (mIndex < kInvalidReg) + goto _IllegalDisp; + + EMIT_BYTE(x86EncodeMod(0, opReg, 5)); + dispOffset -= (4 + imLen); + + if (label->offset != -1) { + // Bound label. + dispOffset += label->offset - static_cast((intptr_t)(cursor - self->_buffer)); + EMIT_DWORD(static_cast(dispOffset)); + } + else { + // Non-bound label. + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + } + else { + // [RIP + Disp32]. + + // Indexing is invalid. + if (mIndex < kInvalidReg) + goto _IllegalDisp; + + EMIT_BYTE(x86EncodeMod(0, opReg, 5)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + + if (imLen == 0) + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Imm] + // -------------------------------------------------------------------------- + +_EmitImm: + switch (imLen) { + case 1: EMIT_BYTE (imVal & 0x000000FF); break; + case 2: EMIT_WORD (imVal & 0x0000FFFF); break; + case 4: EMIT_DWORD(imVal & 0xFFFFFFFF); break; + case 8: EMIT_QWORD(imVal ); break; + + default: + ASMJIT_NOT_REACHED(); + } + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Fpu] + // -------------------------------------------------------------------------- + +_EmitFpuOp: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Instruction opcodes. + EMIT_BYTE(opCode >> 8); + EMIT_BYTE(opCode); + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Avx] + // -------------------------------------------------------------------------- + +#define EMIT_AVX_M \ + ASMJIT_ASSERT(rmMem != nullptr); \ + ASMJIT_ASSERT(rmMem->getOp() == Operand::kTypeMem); \ + \ + if (rmMem->hasSegment()) { \ + EMIT_BYTE(x86SegmentPrefix[rmMem->getSegment()]); \ + } \ + \ + mBase = rmMem->getBase(); \ + mIndex = rmMem->getIndex(); \ + \ + { \ + uint32_t vex_XvvvvLpp; \ + uint32_t vex_rxbmmmmm; \ + \ + vex_XvvvvLpp = (opCode >> (kX86InstOpCode_W_Shift - 7)) & 0x80; \ + vex_XvvvvLpp += (opCode >> (kX86InstOpCode_L_Shift - 2)) & 0x04; \ + vex_XvvvvLpp += (opCode >> (kX86InstOpCode_PP_Shift )) & 0x03; \ + vex_XvvvvLpp += (opReg >> (kVexVVVVShift - 3)); \ + \ + vex_rxbmmmmm = (opCode >> kX86InstOpCode_MM_Shift) & 0x0F; \ + vex_rxbmmmmm |= static_cast(mBase - 8 < 8) << 5; \ + vex_rxbmmmmm |= static_cast(mIndex - 8 < 8) << 6; \ + \ + if ((vex_rxbmmmmm != 0x01) || (vex_XvvvvLpp >= 0x80) || ((options & kX86InstOptionVex3) != 0)) { \ + vex_rxbmmmmm |= static_cast(opReg << 4) & 0x80; \ + vex_rxbmmmmm ^= 0xE0; \ + vex_XvvvvLpp ^= 0x78; \ + \ + EMIT_BYTE(kX86ByteVex3); \ + EMIT_BYTE(vex_rxbmmmmm); \ + EMIT_BYTE(vex_XvvvvLpp); \ + EMIT_BYTE(opCode); \ + } \ + else { \ + vex_XvvvvLpp |= static_cast(opReg << 4) & 0x80; \ + vex_XvvvvLpp ^= 0xF8; \ + \ + EMIT_BYTE(kX86ByteVex2); \ + EMIT_BYTE(vex_XvvvvLpp); \ + EMIT_BYTE(opCode); \ + } \ + } \ + \ + mBase &= 0x07; \ + opReg &= 0x07; + +_EmitAvxOp: + { + uint32_t vex_XvvvvLpp; + + vex_XvvvvLpp = (opCode >> (kX86InstOpCode_L_Shift - 2)) & 0x04; + vex_XvvvvLpp |= (opCode >> (kX86InstOpCode_PP_Shift)); + vex_XvvvvLpp |= 0xF8; + + // Encode 3-byte VEX prefix only if specified in options. + if ((options & kX86InstOptionVex3) != 0) { + uint32_t vex_rxbmmmmm = (opCode >> kX86InstOpCode_MM_Shift) | 0xE0; + + EMIT_BYTE(kX86ByteVex3); + EMIT_BYTE(vex_rxbmmmmm); + EMIT_BYTE(vex_XvvvvLpp); + EMIT_BYTE(opCode); + } + else { + EMIT_BYTE(kX86ByteVex2); + EMIT_BYTE(vex_XvvvvLpp); + EMIT_BYTE(opCode); + } + } + goto _EmitDone; + +_EmitAvxR: + { + uint32_t vex_XvvvvLpp; + uint32_t vex_rxbmmmmm; + + vex_XvvvvLpp = (opCode >> (kX86InstOpCode_W_Shift - 7)) & 0x80; + vex_XvvvvLpp += (opCode >> (kX86InstOpCode_L_Shift - 2)) & 0x04; + vex_XvvvvLpp += (opCode >> (kX86InstOpCode_PP_Shift )) & 0x03; + vex_XvvvvLpp += (opReg >> (kVexVVVVShift - 3)); + + vex_rxbmmmmm = (opCode >> kX86InstOpCode_MM_Shift) & 0x0F; + vex_rxbmmmmm |= (rmReg << 2) & 0x20; + + if (vex_rxbmmmmm != 0x01 || vex_XvvvvLpp >= 0x80 || (options & kX86InstOptionVex3) != 0) { + vex_rxbmmmmm |= static_cast(opReg & 0x08) << 4; + vex_rxbmmmmm ^= 0xE0; + vex_XvvvvLpp ^= 0x78; + + EMIT_BYTE(kX86ByteVex3); + EMIT_BYTE(vex_rxbmmmmm); + EMIT_BYTE(vex_XvvvvLpp); + EMIT_BYTE(opCode); + + rmReg &= 0x07; + } + else { + vex_XvvvvLpp += static_cast(opReg & 0x08) << 4; + vex_XvvvvLpp ^= 0xF8; + + EMIT_BYTE(kX86ByteVex2); + EMIT_BYTE(vex_XvvvvLpp); + EMIT_BYTE(opCode); + } + } + + EMIT_BYTE(x86EncodeMod(3, opReg & 0x07, static_cast(rmReg))); + + if (imLen == 0) + goto _EmitDone; + + EMIT_BYTE(imVal & 0xFF); + goto _EmitDone; + +_EmitAvxM: + EMIT_AVX_M + goto _EmitSib; + +_EmitAvxV: + EMIT_AVX_M + + if (mIndex >= kInvalidReg) + goto _IllegalInst; + + if (Arch == kArchX64) + mIndex &= 0x07; + + dispOffset = rmMem->getDisplacement(); + if (rmMem->isBaseIndexType()) { + uint32_t shift = rmMem->getShift(); + + if (mBase != kX86RegIndexBp && dispOffset == 0) { + // [Base + Index * Scale]. + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + } + else if (Utils::isInt8(dispOffset)) { + // [Base + Index * Scale + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Base + Index * Scale + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + else { + // [Index * Scale + Disp32]. + uint32_t shift = rmMem->getShift(); + + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, 5)); + + if (rmMem->getMemType() == kMemTypeLabel) { + if (Arch == kArchX64) + goto _IllegalAddr; + + // Relative->Absolute [x86 mode]. + label = self->getLabelData(rmMem->_vmem.base); + relocId = self->_relocations.getLength(); + + { + RelocData rd; + rd.type = kRelocRelToAbs; + rd.size = 4; + rd.from = static_cast((uintptr_t)(cursor - self->_buffer)); + rd.data = static_cast(dispOffset); + + if (self->_relocations.append(rd) != kErrorOk) + return self->setLastError(kErrorNoHeapMemory); + } + + if (label->offset != -1) { + // Bound label. + self->_relocations[relocId].data += static_cast(label->offset); + EMIT_DWORD(0); + } + else { + // Non-bound label. + dispOffset = -4 - imLen; + dispSize = 4; + goto _EmitDisplacement; + } + } + else { + // [Disp32]. + EMIT_DWORD(static_cast(dispOffset)); + } + } + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Xop] + // -------------------------------------------------------------------------- + +#define EMIT_XOP_M \ + ASMJIT_ASSERT(rmMem != nullptr); \ + ASMJIT_ASSERT(rmMem->getOp() == Operand::kTypeMem); \ + \ + if (rmMem->hasSegment()) { \ + EMIT_BYTE(x86SegmentPrefix[rmMem->getSegment()]); \ + } \ + \ + mBase = rmMem->getBase(); \ + mIndex = rmMem->getIndex(); \ + \ + { \ + uint32_t vex_XvvvvLpp; \ + uint32_t vex_rxbmmmmm; \ + \ + vex_XvvvvLpp = (opCode >> (kX86InstOpCode_W_Shift - 7)) & 0x80; \ + vex_XvvvvLpp += (opCode >> (kX86InstOpCode_L_Shift - 2)) & 0x04; \ + vex_XvvvvLpp += (opCode >> (kX86InstOpCode_PP_Shift )) & 0x03; \ + vex_XvvvvLpp += (opReg >> (kVexVVVVShift - 3)); \ + \ + vex_rxbmmmmm = (opCode >> kX86InstOpCode_MM_Shift) & 0x0F; \ + vex_rxbmmmmm += static_cast(mBase - 8 < 8) << 5; \ + vex_rxbmmmmm += static_cast(mIndex - 8 < 8) << 6; \ + \ + vex_rxbmmmmm |= static_cast(opReg << 4) & 0x80; \ + vex_rxbmmmmm ^= 0xE0; \ + vex_XvvvvLpp ^= 0x78; \ + \ + EMIT_BYTE(kX86ByteXop3); \ + EMIT_BYTE(vex_rxbmmmmm); \ + EMIT_BYTE(vex_XvvvvLpp); \ + EMIT_BYTE(opCode); \ + } \ + \ + mBase &= 0x07; \ + opReg &= 0x07; + +_EmitXopR: + { + uint32_t xop_XvvvvLpp; + uint32_t xop_rxbmmmmm; + + xop_XvvvvLpp = (opCode >> (kX86InstOpCode_W_Shift - 7)) & 0x80; + xop_XvvvvLpp += (opCode >> (kX86InstOpCode_L_Shift - 2)) & 0x04; + xop_XvvvvLpp += (opCode >> (kX86InstOpCode_PP_Shift )) & 0x03; + xop_XvvvvLpp += (opReg >> (kVexVVVVShift - 3)); + + xop_rxbmmmmm = (opCode >> kX86InstOpCode_MM_Shift) & 0x0F; + xop_rxbmmmmm |= (rmReg << 2) & 0x20; + + xop_rxbmmmmm |= static_cast(opReg & 0x08) << 4; + xop_rxbmmmmm ^= 0xE0; + xop_XvvvvLpp ^= 0x78; + + EMIT_BYTE(kX86ByteXop3); + EMIT_BYTE(xop_rxbmmmmm); + EMIT_BYTE(xop_XvvvvLpp); + EMIT_BYTE(opCode); + + rmReg &= 0x07; + } + + EMIT_BYTE(x86EncodeMod(3, opReg & 0x07, static_cast(rmReg))); + + if (imLen == 0) + goto _EmitDone; + + EMIT_BYTE(imVal & 0xFF); + goto _EmitDone; + +_EmitXopM: + EMIT_XOP_M + goto _EmitSib; + + // -------------------------------------------------------------------------- + // [Emit - Jump/Call to an Immediate] + // -------------------------------------------------------------------------- + + // 64-bit mode requires a trampoline if a relative displacement doesn't fit + // into a 32-bit address. Old version of AsmJit used to emit jump to a section + // which contained another jump followed by an address (it worked well for + // both `jmp` and `call`), but it required to reserve 14-bytes for a possible + // trampoline. + // + // Instead of using 5-byte `jmp/call` and reserving 14 bytes required by the + // trampoline, it's better to use 6-byte `jmp/call` (prefixing it with REX + // prefix) and to patch the `jmp/call` instruction to read the address from + // a memory in case the trampoline is needed. +_EmitJmpOrCallAbs: + { + RelocData rd; + rd.type = kRelocAbsToRel; + rd.size = 4; + rd.from = (intptr_t)(cursor - self->_buffer) + 1; + rd.data = static_cast(imVal); + + uint32_t trampolineSize = 0; + + if (Arch == kArchX64) { + Ptr baseAddress = self->getRuntime()->getBaseAddress(); + + // If the base address of the output is known, it's possible to determine + // the need for a trampoline here. This saves possible REX prefix in + // 64-bit mode and prevents reserving space needed for an absolute address. + if (baseAddress == kNoBaseAddress || !x64IsRelative(rd.data, baseAddress + rd.from + 4)) { + // Emit REX prefix so the instruction can be patched later on. The REX + // prefix does nothing if not patched after, but allows to patch the + // instruction in case where the trampoline is needed. + rd.type = kRelocTrampoline; + rd.from++; + + EMIT_BYTE(kX86ByteRex); + trampolineSize = 8; + } + } + + // Both `jmp` and `call` instructions have a single-byte opcode and are + // followed by a 32-bit displacement. + EMIT_BYTE(opCode); + EMIT_DWORD(0); + + if (self->_relocations.append(rd) != kErrorOk) + return self->setLastError(kErrorNoHeapMemory); + + // Reserve space for a possible trampoline. + self->_trampolinesSize += trampolineSize; + } + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Displacement] + // -------------------------------------------------------------------------- + +_EmitDisplacement: + { + ASMJIT_ASSERT(label->offset == -1); + ASMJIT_ASSERT(dispSize == 1 || dispSize == 4); + + // Chain with label. + LabelLink* link = self->_newLabelLink(); + link->prev = label->links; + link->offset = (intptr_t)(cursor - self->_buffer); + link->displacement = dispOffset; + link->relocId = relocId; + label->links = link; + + // Emit label size as dummy data. + if (dispSize == 1) + EMIT_BYTE(0x01); + else // if (dispSize == 4) + EMIT_DWORD(0x04040404); + + if (imLen != 0) + goto _EmitImm; + } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + +_EmitDone: +#if !defined(ASMJIT_DISABLE_LOGGER) + if (self->_logger || assertIllegal) { + StringBuilderTmp<512> sb; + uint32_t loggerOptions = 0; + + if (self->_logger) { + sb.appendString(self->_logger->getIndentation()); + loggerOptions = self->_logger->getOptions(); + } + + X86Assembler_dumpInstruction(sb, Arch, code, options, o0, o1, o2, o3, loggerOptions); + + if ((loggerOptions & Logger::kOptionBinaryForm) != 0) + LogUtil::formatLine(sb, self->_cursor, (intptr_t)(cursor - self->_cursor), dispSize, imLen, self->_comment); + else + LogUtil::formatLine(sb, nullptr, kInvalidIndex, 0, 0, self->_comment); + +# if defined(ASMJIT_DEBUG) + if (self->_logger) +# endif // ASMJIT_DEBUG + self->_logger->logString(Logger::kStyleDefault, sb.getData(), sb.getLength()); + +# if defined(ASMJIT_DEBUG) + // This shouldn't happen. + if (assertIllegal) + DebugUtils::assertionFailed(__FILE__, __LINE__, sb.getData()); +# endif // ASMJIT_DEBUG + } +#else + ASMJIT_ASSERT(!assertIllegal); +#endif // !ASMJIT_DISABLE_LOGGER + + self->_comment = nullptr; + self->setCursor(cursor); + + return kErrorOk; +} + +Error X86Assembler::_emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) { +#if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_BUILD_X64) + ASMJIT_ASSERT(_arch == kArchX86); + return X86Assembler_emit(this, code, &o0, &o1, &o2, &o3); +#elif !defined(ASMJIT_BUILD_X86) && defined(ASMJIT_BUILD_X64) + ASMJIT_ASSERT(_arch == kArchX64); + return X86Assembler_emit(this, code, &o0, &o1, &o2, &o3); +#else + if (_arch == kArchX86) + return X86Assembler_emit(this, code, &o0, &o1, &o2, &o3); + else + return X86Assembler_emit(this, code, &o0, &o1, &o2, &o3); +#endif +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86assembler.h b/DynamicHooks/thirdparty/AsmJit/x86/x86assembler.h new file mode 100644 index 0000000..59021e8 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86assembler.h @@ -0,0 +1,6717 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86ASSEMBLER_H +#define _ASMJIT_X86_X86ASSEMBLER_H + +// [Dependencies] +#include "../base/assembler.h" +#include "../x86/x86inst.h" +#include "../x86/x86operand.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::X86Assembler] +// ============================================================================ + +// \internal +#define ASMJIT_X86_EMIT_OPTIONS(T) \ + /*! Force short form of jmp/jcc instruction. */ \ + ASMJIT_INLINE T& short_() noexcept { \ + _instOptions |= kInstOptionShortForm; \ + return *this; \ + } \ + \ + /*! Force long form of jmp/jcc instruction. */ \ + ASMJIT_INLINE T& long_() noexcept { \ + _instOptions |= kInstOptionLongForm; \ + return *this; \ + } \ + \ + /*! Condition is likely to be taken (has only benefit on P4). */ \ + ASMJIT_INLINE T& taken() noexcept { \ + _instOptions |= kInstOptionTaken; \ + return *this; \ + } \ + \ + /*! Condition is unlikely to be taken (has only benefit on P4). */ \ + ASMJIT_INLINE T& notTaken() noexcept { \ + _instOptions |= kInstOptionNotTaken; \ + return *this; \ + } \ + \ + /*! Use LOCK prefix. */ \ + ASMJIT_INLINE T& lock() noexcept { \ + _instOptions |= kX86InstOptionLock; \ + return *this; \ + } \ + \ + /*! Force REX prefix (X64). */ \ + ASMJIT_INLINE T& rex() noexcept { \ + _instOptions |= kX86InstOptionRex; \ + return *this; \ + } \ + \ + /*! Force 3-byte VEX prefix (AVX+). */ \ + ASMJIT_INLINE T& vex3() noexcept { \ + _instOptions |= kX86InstOptionVex3; \ + return *this; \ + } \ + \ + /*! Force 4-byte EVEX prefix (AVX512+). */ \ + ASMJIT_INLINE T& evex() noexcept { \ + _instOptions |= kX86InstOptionEvex; \ + return *this; \ + } \ + \ + /*! Use zeroing instead of merging (AVX512+). */ \ + ASMJIT_INLINE T& z() noexcept { \ + _instOptions |= kX86InstOptionEvexZero; \ + return *this; \ + } \ + \ + /*! Broadcast one element to all other elements (AVX512+). */ \ + ASMJIT_INLINE T& _1ToN() noexcept { \ + _instOptions |= kX86InstOptionEvexOneN; \ + return *this; \ + } \ + \ + /*! Suppress all exceptions (AVX512+). */ \ + ASMJIT_INLINE T& sae() noexcept { \ + _instOptions |= kX86InstOptionEvexSae; \ + return *this; \ + } \ + \ + /*! Static rounding mode `round-to-nearest` (even) and `SAE` (AVX512+). */ \ + ASMJIT_INLINE T& rn_sae() noexcept { \ + _instOptions |= kX86InstOptionEvexRnSae; \ + return *this; \ + } \ + \ + /*! Static rounding mode `round-down` (toward -inf) and `SAE` (AVX512+). */ \ + ASMJIT_INLINE T& rd_sae() noexcept { \ + _instOptions |= kX86InstOptionEvexRdSae; \ + return *this; \ + } \ + \ + /*! Static rounding mode `round-up` (toward +inf) and `SAE` (AVX512+). */ \ + ASMJIT_INLINE T& ru_sae() noexcept { \ + _instOptions |= kX86InstOptionEvexRuSae; \ + return *this; \ + } \ + \ + /*! Static rounding mode `round-toward-zero` (truncate) and `SAE` (AVX512+). */ \ + ASMJIT_INLINE T& rz_sae() noexcept { \ + _instOptions |= kX86InstOptionEvexRzSae; \ + return *this; \ + } + +//! X86/X64 assembler. +//! +//! Assembler is the main class in AsmJit that can encode instructions and their +//! operands to a binary stream runnable by CPU. It creates internal buffer +//! where the encodes instructions are stored and it contains intrinsics that +//! can be used to emit the code in a convenient way. Code generation is in +//! general safe, because the intrinsics uses method overloading so even the +//! code is emitted it can be checked by a C++ compiler. It's nearly impossible +//! to create invalid instruction, for example `mov [eax], [eax]`, because such +//! overload doesn't exist. +//! +//! Each call to an assembler intrinsic function emits instruction directly +//! to the binary stream. There are also runtime checks that prevent invalid +//! code to be emitted. It will assert in debug mode and put the `Assembler` +//! instance to an error state in production mode. +//! +//! Code Generation +//! --------------- +//! +//! To generate code is only needed to create instance of `Assembler` +//! and to use intrinsics. See example how to do that: +//! +//! ~~~ +//! // Use asmjit namespace. +//! using namespace asmjit; +//! using namespace asmjit::x86; +//! +//! // Create X86Assembler instance. +//! X86Assembler a; +//! +//! // Prolog. +//! a.push(ebp); +//! a.mov(ebp, esp); +//! +//! // Mov 1024 to EAX, EAX is also return value. +//! a.mov(eax, 1024); +//! +//! // Epilog. +//! a.mov(esp, ebp); +//! a.pop(ebp); +//! +//! // Return. +//! a.ret(); +//! ~~~ +//! +//! You can see that syntax is very close to the Intel one. Only difference is +//! that you are calling functions that emit binary code for you. All registers +//! are in `asmjit::x86` namespace, so it's very comfortable to use it (look at +//! the `use namespace` section). Without importing `asmjit::x86` registers would +//! have to be written as `x86::eax`, `x86::esp`, and so on. +//! +//! There is also possibility to use memory addresses and immediates. Use +//! `ptr()`, `byte_ptr()`, `word_ptr()`, `dword_ptr()` and similar functions to +//! build a memory address operand. In most cases `ptr()` is enough, because an +//! information related to the operand size is needed only in rare cases, that +//! is an instruction without having any register operands, such as `inc [mem]`. +//! +//! for example, `a` is an `X86Assembler` instance: +//! +//! ~~~ +//! a.mov(ptr(eax), 0); // mov ptr [eax], 0 +//! a.mov(ptr(eax), edx); // mov ptr [eax], edx +//! ~~~ +//! +//! But it's also possible to create complex addresses offered by x86 architecture: +//! +//! ~~~ +//! // eax + ecx*x addresses +//! a.mov(ptr(eax, ecx, 0), 0); // mov ptr [eax + ecx], 0 +//! a.mov(ptr(eax, ecx, 1), 0); // mov ptr [eax + ecx * 2], 0 +//! a.mov(ptr(eax, ecx, 2), 0); // mov ptr [eax + ecx * 4], 0 +//! a.mov(ptr(eax, ecx, 3), 0); // mov ptr [eax + ecx * 8], 0 +//! // eax + ecx*x + disp addresses +//! a.mov(ptr(eax, ecx, 0, 4), 0); // mov ptr [eax + ecx + 4], 0 +//! a.mov(ptr(eax, ecx, 1, 8), 0); // mov ptr [eax + ecx * 2 + 8], 0 +//! a.mov(ptr(eax, ecx, 2, 12), 0); // mov ptr [eax + ecx * 4 + 12], 0 +//! a.mov(ptr(eax, ecx, 3, 16), 0); // mov ptr [eax + ecx * 8 + 16], 0 +//! ~~~ +//! +//! All addresses shown are using `x86::ptr()` to make memory operand. Some +//! assembler instructions using a single operand need to know the size of +//! the operand to avoid ambiguity. For example `a.inc(ptr(eax))` is ambiguous +//! and would cause a runtime error. This problem can be fixed by using memory +//! operand with size specified - `byte_ptr`, `word_ptr`, `dword_ptr`, see the +//! code below: +//! +//! ~~~ +//! // [byte] address. +//! a.inc(byte_ptr(eax)); // Inc byte ptr [eax]. +//! a.dec(byte_ptr(eax)); // Dec byte ptr [eax]. +//! // [word] address. +//! a.inc(word_ptr(eax)); // Inc word ptr [eax]. +//! a.dec(word_ptr(eax)); // Dec word ptr [eax]. +//! // [dword] address. +//! a.inc(dword_ptr(eax)); // Inc dword ptr [eax]. +//! a.dec(dword_ptr(eax)); // Dec dword ptr [eax]. +//! // [dword] address. +//! a.inc(dword_ptr(rax)); // Inc qword ptr [rax]. +//! a.dec(dword_ptr(rax)); // Dec qword ptr [rax]. +//! ~~~ +//! +//! Calling JIT Code +//! ---------------- +//! +//! After you are finished with emitting instructions, you can make your function +//! callable by using `Assembler::make()` method. This method will use memory +//! manager to allocate virtual memory and relocates generated code to it. The +//! memory is allocated through `Runtime` instance provided to `X86Assembler` +//! constructor. +//! +//! The size of the code generated can be retrieved by `getCodeSize()` and +//! `getOffset()` methods. The `getOffset()` method returns the current offset +//! (that is mostly equal to the final code size, if called after the code +//! generation) and `getCodeSize()` returns the final code size with possible +//! trampolines. The `takeCode()` method can be used to take the internal buffer +//! and reset the code generator, but the buffer returned has to be freed manually +//! in such case. +//! +//! Machine code can be executed only in memory that is marked executable. This +//! mark is usually not set for memory returned by a C/C++ `malloc()` function. +//! The `VMemUtil::alloc()` function can be used allocate a memory where the code +//! can be executed. Please note that `VMemUtil` is a low-level class that works +//! at memory page level. High level interface that is similar to malloc/free is +//! provided by `VMemMgr` class. +//! +//! The next example shows how to allocate memory where the code can be executed: +//! +//! ~~~ +//! using namespace asmjit; +//! +//! JitRuntime runtime; +//! X86Assembler a(&runtime); +//! +//! ... Code generation ... +//! +//! // The function prototype. +//! typedef void (*MyFunc)(); +//! +//! // Make the function. +//! MyFunc func = asmjit_cast(a.make()); +//! +//! // Call the function. +//! func(); +//! +//! // Release the function if not needed anymore. +//! runtime.release(func); +//! ~~~ +//! +//! This was a very primitive example showing how the generated code can be. +//! executed by using the foundation of classes AsmJit offers. In production +//! nobody is likely to generate a function that is only called once and freed +//! immediately after it's been called, however, the concept of releasing code +//! that is not needed anymore should be clear. +//! +//! Labels +//! ------ +//! +//! While generating assembler code, you will usually need to create complex +//! code with labels. Labels are fully supported and you can call `jmp` or +//! `je` (and similar) instructions to initialized or yet uninitialized label. +//! Each label expects to be bound into offset. To bind label to specific +//! offset, use `Assembler::bind()` function. +//! +//! See next example that contains complete code that creates simple memory +//! copy function (in DWORD entities). +//! +//! ~~~ +//! // Example: Usage of Label (32-bit code). +//! // +//! // Create simple DWORD memory copy function: +//! // ASMJIT_STDCALL void copy32(uint32_t* dst, const uint32_t* src, size_t count); +//! using namespace asmjit; +//! +//! // Assembler instance. +//! JitRuntime runtime; +//! X86Assembler a(&runtime); +//! +//! // Constants. +//! const int arg_offset = 8; // Arguments offset (STDCALL EBP). +//! const int arg_size = 12; // Arguments size. +//! +//! // Labels. +//! Label L_Loop = a.newLabel(); +//! +//! // Prolog. +//! a.push(ebp); +//! a.mov(ebp, esp); +//! a.push(esi); +//! a.push(edi); +//! +//! // Fetch arguments +//! a.mov(esi, dword_ptr(ebp, arg_offset + 0)); // Get dst. +//! a.mov(edi, dword_ptr(ebp, arg_offset + 4)); // Get src. +//! a.mov(ecx, dword_ptr(ebp, arg_offset + 8)); // Get count. +//! +//! // Bind L_Loop label to here. +//! a.bind(L_Loop); +//! +//! Copy 4 bytes. +//! a.mov(eax, dword_ptr(esi)); +//! a.mov(dword_ptr(edi), eax); +//! +//! // Increment pointers. +//! a.add(esi, 4); +//! a.add(edi, 4); +//! +//! // Repeat loop until (--ecx != 0). +//! a.dec(ecx); +//! a.jz(L_Loop); +//! +//! // Epilog. +//! a.pop(edi); +//! a.pop(esi); +//! a.mov(esp, ebp); +//! a.pop(ebp); +//! +//! // Return: Pop the stack by `arg_size` as defined by `STDCALL` convention. +//! a.ret(arg_size); +//! ~~~ +//! +//! If you need more abstraction for generating assembler code and you want +//! to hide calling conventions between 32-bit and 64-bit operating systems, +//! look at `Compiler` class that is designed for higher level code +//! generation. +//! +//! Advanced Code Generation +//! ------------------------ +//! +//! This section describes some advanced generation features of `Assembler` +//! class which can be simply overlooked. The first thing that is very likely +//! needed is generic register support. In previous example the named registers +//! were used. AsmJit contains functions which can convert register index into +//! operand and back. +//! +//! Let's define function which can be used to generate some abstract code: +//! +//! ~~~ +//! // Simple function that generates a DWORD copy. +//! void genDWordCopy(Assembler& a, const X86GpReg& dst, const X86GpReg& src, const X86GpReg& tmp) { +//! a.mov(tmp, dword_ptr(src)); +//! a.mov(dword_ptr(dst), tmp); +//! } +//! ~~~ +//! +//! This function can be called like `genDWordCopy(a, edi, esi, ebx)` or by +//! using existing `X86GpReg` instances. This abstraction allows to join more +//! code sections together without rewriting each to use specific registers. +//! You need to take care only about implicit registers which may be used by +//! several instructions (like mul, imul, div, idiv, shifting, etc...). +//! +//! Next, more advanced, but often needed technique is that you can build your +//! own registers allocator. X86 architecture contains 8 general purpose +//! registers, 8 Mm registers and 8 XMM/YMM/XMM registers. X64 architecture +//! extends the count of GP registers and XMM/YMM/ZMM registers to 16. AVX-512 +//! architecture extends XMM/YMM/ZMM SIMD registers to 32. +//! +//! To create a general purpose register operand from register index use +//! `gpb_lo()`, `gpb_hi()`, `gpw()`, `gpd()`, `gpq()`. To create registers of +//! other types there `fp()`, `mm()`, `k()`, `xmm()`, `ymm()` and `zmm()` +//! functions available that return a new register operand. +//! +//! \sa X86Compiler. +class ASMJIT_VIRTAPI X86Assembler : public Assembler { + public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API X86Assembler(Runtime* runtime, uint32_t arch +#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 + = kArchHost +#endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 + ); + ASMJIT_API virtual ~X86Assembler(); + + // -------------------------------------------------------------------------- + // [Arch] + // -------------------------------------------------------------------------- + + //! \internal + //! + //! Set the assembler architecture to `kArchX86` or `kArchX64`. + ASMJIT_API Error _setArch(uint32_t arch); + + //! Get count of registers of the current architecture and mode. + ASMJIT_INLINE const X86RegCount& getRegCount() const { return _regCount; } + + //! Get DWORD or QWORD register depending on the current architecture. + ASMJIT_INLINE X86GpReg gpz(uint32_t index) const { return X86GpReg(zax, index); } + + //! Create an `intptr_t` memory operand depending on the current architecture. + ASMJIT_INLINE X86Mem intptr_ptr(const X86GpReg& base, int32_t disp = 0) const { + return x86::ptr(base, disp, _regSize); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const X86GpReg& base, const X86GpReg& index, uint32_t shift = 0, int32_t disp = 0) const { + return x86::ptr(base, index, shift, disp, _regSize); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const Label& label, int32_t disp = 0) const { + return x86::ptr(label, disp, _regSize); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const Label& label, const X86GpReg& index, uint32_t shift, int32_t disp = 0) const { + return x86::ptr(label, index, shift, disp, _regSize); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const X86RipReg& rip, int32_t disp = 0) const { + return x86::ptr(rip, disp, _regSize); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr_abs(Ptr pAbs, int32_t disp = 0) const { + return x86::ptr_abs(pAbs, disp, _regSize); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr_abs(Ptr pAbs, const X86GpReg& index, uint32_t shift, int32_t disp = 0) const { + return x86::ptr_abs(pAbs, index, shift, disp, _regSize); + } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! Add 8-bit integer data to the instruction stream. + ASMJIT_INLINE void db(uint8_t x) { embed(&x, 1); } + //! Add 16-bit integer data to the instruction stream. + ASMJIT_INLINE void dw(uint16_t x) { embed(&x, 2); } + //! Add 32-bit integer data to the instruction stream. + ASMJIT_INLINE void dd(uint32_t x) { embed(&x, 4); } + //! Add 64-bit integer data to the instruction stream. + ASMJIT_INLINE void dq(uint64_t x) { embed(&x, 8); } + + //! Add 8-bit integer data to the instruction stream. + ASMJIT_INLINE void dint8(int8_t x) { embed(&x, sizeof(int8_t)); } + //! Add 8-bit integer data to the instruction stream. + ASMJIT_INLINE void duint8(uint8_t x) { embed(&x, sizeof(uint8_t)); } + + //! Add 16-bit integer data to the instruction stream. + ASMJIT_INLINE void dint16(int16_t x) { embed(&x, sizeof(int16_t)); } + //! Add 16-bit integer data to the instruction stream. + ASMJIT_INLINE void duint16(uint16_t x) { embed(&x, sizeof(uint16_t)); } + + //! Add 32-bit integer data to the instruction stream. + ASMJIT_INLINE void dint32(int32_t x) { embed(&x, sizeof(int32_t)); } + //! Add 32-bit integer data to the instruction stream. + ASMJIT_INLINE void duint32(uint32_t x) { embed(&x, sizeof(uint32_t)); } + + //! Add 64-bit integer data to the instruction stream. + ASMJIT_INLINE void dint64(int64_t x) { embed(&x, sizeof(int64_t)); } + //! Add 64-bit integer data to the instruction stream. + ASMJIT_INLINE void duint64(uint64_t x) { embed(&x, sizeof(uint64_t)); } + + //! Add float data to the instruction stream. + ASMJIT_INLINE void dfloat(float x) { embed(&x, sizeof(float)); } + //! Add double data to the instruction stream. + ASMJIT_INLINE void ddouble(double x) { embed(&x, sizeof(double)); } + + //! Add MMX data to the instruction stream. + ASMJIT_INLINE void dmm(const Vec64& x) { embed(&x, sizeof(Vec64)); } + //! Add XMM data to the instruction stream. + ASMJIT_INLINE void dxmm(const Vec128& x) { embed(&x, sizeof(Vec128)); } + //! Add YMM data to the instruction stream. + ASMJIT_INLINE void dymm(const Vec256& x) { embed(&x, sizeof(Vec256)); } + + //! Add data in a given structure instance to the instruction stream. + template + ASMJIT_INLINE void dstruct(const T& x) { embed(&x, static_cast(sizeof(T))); } + + //! Embed absolute label pointer (4 or 8 bytes). + ASMJIT_API Error embedLabel(const Label& op); + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error align(uint32_t alignMode, uint32_t offset) noexcept; + + // -------------------------------------------------------------------------- + // [Reloc] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual size_t _relocCode(void* dst, Ptr baseAddress) const noexcept; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error _emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3); + + // ------------------------------------------------------------------------- + // [Options] + // ------------------------------------------------------------------------- + + ASMJIT_X86_EMIT_OPTIONS(X86Assembler) + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Count of registers depending on the architecture selected. + X86RegCount _regCount; + + //! EAX or RAX register depending on the architecture selected. + X86GpReg zax; + //! ECX or RCX register depending on the architecture selected. + X86GpReg zcx; + //! EDX or RDX register depending on the architecture selected. + X86GpReg zdx; + //! EBX or RBX register depending on the architecture selected. + X86GpReg zbx; + //! ESP or RSP register depending on the architecture selected. + X86GpReg zsp; + //! EBP or RBP register depending on the architecture selected. + X86GpReg zbp; + //! ESI or RSI register depending on the architecture selected. + X86GpReg zsi; + //! EDI or RDI register depending on the architecture selected. + X86GpReg zdi; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + +#define INST_0x(inst, code) \ + ASMJIT_INLINE Error inst() { \ + return emit(code); \ + } + +#define INST_1x(inst, code, T0) \ + ASMJIT_INLINE Error inst(const T0& o0) { \ + return emit(code, o0); \ + } + +#define INST_1i(inst, code, T0) \ + ASMJIT_INLINE Error inst(const T0& o0) { return emit(code, o0); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(int o0) { return emit(code, Utils::asInt(o0)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(unsigned int o0) { return emit(code, Utils::asInt(o0)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(int64_t o0) { return emit(code, Utils::asInt(o0)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(uint64_t o0) { return emit(code, Utils::asInt(o0)); } + +#define INST_1cc(inst, code, _Translate_, T0) \ + ASMJIT_INLINE Error inst(uint32_t cc, const T0& o0) { \ + return emit(_Translate_(cc), o0); \ + } \ + \ + ASMJIT_INLINE Error inst##a(const T0& o0) { return emit(code##a, o0); } \ + ASMJIT_INLINE Error inst##ae(const T0& o0) { return emit(code##ae, o0); } \ + ASMJIT_INLINE Error inst##b(const T0& o0) { return emit(code##b, o0); } \ + ASMJIT_INLINE Error inst##be(const T0& o0) { return emit(code##be, o0); } \ + ASMJIT_INLINE Error inst##c(const T0& o0) { return emit(code##c, o0); } \ + ASMJIT_INLINE Error inst##e(const T0& o0) { return emit(code##e, o0); } \ + ASMJIT_INLINE Error inst##g(const T0& o0) { return emit(code##g, o0); } \ + ASMJIT_INLINE Error inst##ge(const T0& o0) { return emit(code##ge, o0); } \ + ASMJIT_INLINE Error inst##l(const T0& o0) { return emit(code##l, o0); } \ + ASMJIT_INLINE Error inst##le(const T0& o0) { return emit(code##le, o0); } \ + ASMJIT_INLINE Error inst##na(const T0& o0) { return emit(code##na, o0); } \ + ASMJIT_INLINE Error inst##nae(const T0& o0) { return emit(code##nae, o0); } \ + ASMJIT_INLINE Error inst##nb(const T0& o0) { return emit(code##nb, o0); } \ + ASMJIT_INLINE Error inst##nbe(const T0& o0) { return emit(code##nbe, o0); } \ + ASMJIT_INLINE Error inst##nc(const T0& o0) { return emit(code##nc, o0); } \ + ASMJIT_INLINE Error inst##ne(const T0& o0) { return emit(code##ne, o0); } \ + ASMJIT_INLINE Error inst##ng(const T0& o0) { return emit(code##ng, o0); } \ + ASMJIT_INLINE Error inst##nge(const T0& o0) { return emit(code##nge, o0); } \ + ASMJIT_INLINE Error inst##nl(const T0& o0) { return emit(code##nl, o0); } \ + ASMJIT_INLINE Error inst##nle(const T0& o0) { return emit(code##nle, o0); } \ + ASMJIT_INLINE Error inst##no(const T0& o0) { return emit(code##no, o0); } \ + ASMJIT_INLINE Error inst##np(const T0& o0) { return emit(code##np, o0); } \ + ASMJIT_INLINE Error inst##ns(const T0& o0) { return emit(code##ns, o0); } \ + ASMJIT_INLINE Error inst##nz(const T0& o0) { return emit(code##nz, o0); } \ + ASMJIT_INLINE Error inst##o(const T0& o0) { return emit(code##o, o0); } \ + ASMJIT_INLINE Error inst##p(const T0& o0) { return emit(code##p, o0); } \ + ASMJIT_INLINE Error inst##pe(const T0& o0) { return emit(code##pe, o0); } \ + ASMJIT_INLINE Error inst##po(const T0& o0) { return emit(code##po, o0); } \ + ASMJIT_INLINE Error inst##s(const T0& o0) { return emit(code##s, o0); } \ + ASMJIT_INLINE Error inst##z(const T0& o0) { return emit(code##z, o0); } + +#define INST_2x(inst, code, T0, T1) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1) { \ + return emit(code, o0, o1); \ + } + +#define INST_2i(inst, code, T0, T1) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1) { return emit(code, o0, o1); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, int o1) { return emit(code, o0, Utils::asInt(o1)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, unsigned int o1) { return emit(code, o0, Utils::asInt(o1)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, int64_t o1) { return emit(code, o0, Utils::asInt(o1)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, uint64_t o1) { return emit(code, o0, Utils::asInt(o1)); } + +#define INST_2cc(inst, code, _Translate_, T0, T1) \ + ASMJIT_INLINE Error inst(uint32_t cc, const T0& o0, const T1& o1) { \ + return emit(_Translate_(cc), o0, o1); \ + } \ + \ + ASMJIT_INLINE Error inst##a(const T0& o0, const T1& o1) { return emit(code##a, o0, o1); } \ + ASMJIT_INLINE Error inst##ae(const T0& o0, const T1& o1) { return emit(code##ae, o0, o1); } \ + ASMJIT_INLINE Error inst##b(const T0& o0, const T1& o1) { return emit(code##b, o0, o1); } \ + ASMJIT_INLINE Error inst##be(const T0& o0, const T1& o1) { return emit(code##be, o0, o1); } \ + ASMJIT_INLINE Error inst##c(const T0& o0, const T1& o1) { return emit(code##c, o0, o1); } \ + ASMJIT_INLINE Error inst##e(const T0& o0, const T1& o1) { return emit(code##e, o0, o1); } \ + ASMJIT_INLINE Error inst##g(const T0& o0, const T1& o1) { return emit(code##g, o0, o1); } \ + ASMJIT_INLINE Error inst##ge(const T0& o0, const T1& o1) { return emit(code##ge, o0, o1); } \ + ASMJIT_INLINE Error inst##l(const T0& o0, const T1& o1) { return emit(code##l, o0, o1); } \ + ASMJIT_INLINE Error inst##le(const T0& o0, const T1& o1) { return emit(code##le, o0, o1); } \ + ASMJIT_INLINE Error inst##na(const T0& o0, const T1& o1) { return emit(code##na, o0, o1); } \ + ASMJIT_INLINE Error inst##nae(const T0& o0, const T1& o1) { return emit(code##nae, o0, o1); } \ + ASMJIT_INLINE Error inst##nb(const T0& o0, const T1& o1) { return emit(code##nb, o0, o1); } \ + ASMJIT_INLINE Error inst##nbe(const T0& o0, const T1& o1) { return emit(code##nbe, o0, o1); } \ + ASMJIT_INLINE Error inst##nc(const T0& o0, const T1& o1) { return emit(code##nc, o0, o1); } \ + ASMJIT_INLINE Error inst##ne(const T0& o0, const T1& o1) { return emit(code##ne, o0, o1); } \ + ASMJIT_INLINE Error inst##ng(const T0& o0, const T1& o1) { return emit(code##ng, o0, o1); } \ + ASMJIT_INLINE Error inst##nge(const T0& o0, const T1& o1) { return emit(code##nge, o0, o1); } \ + ASMJIT_INLINE Error inst##nl(const T0& o0, const T1& o1) { return emit(code##nl, o0, o1); } \ + ASMJIT_INLINE Error inst##nle(const T0& o0, const T1& o1) { return emit(code##nle, o0, o1); } \ + ASMJIT_INLINE Error inst##no(const T0& o0, const T1& o1) { return emit(code##no, o0, o1); } \ + ASMJIT_INLINE Error inst##np(const T0& o0, const T1& o1) { return emit(code##np, o0, o1); } \ + ASMJIT_INLINE Error inst##ns(const T0& o0, const T1& o1) { return emit(code##ns, o0, o1); } \ + ASMJIT_INLINE Error inst##nz(const T0& o0, const T1& o1) { return emit(code##nz, o0, o1); } \ + ASMJIT_INLINE Error inst##o(const T0& o0, const T1& o1) { return emit(code##o, o0, o1); } \ + ASMJIT_INLINE Error inst##p(const T0& o0, const T1& o1) { return emit(code##p, o0, o1); } \ + ASMJIT_INLINE Error inst##pe(const T0& o0, const T1& o1) { return emit(code##pe, o0, o1); } \ + ASMJIT_INLINE Error inst##po(const T0& o0, const T1& o1) { return emit(code##po, o0, o1); } \ + ASMJIT_INLINE Error inst##s(const T0& o0, const T1& o1) { return emit(code##s, o0, o1); } \ + ASMJIT_INLINE Error inst##z(const T0& o0, const T1& o1) { return emit(code##z, o0, o1); } + +#define INST_3x(inst, code, T0, T1, T2) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2) { return emit(code, o0, o1, o2); } + +#define INST_3i(inst, code, T0, T1, T2) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2) { return emit(code, o0, o1, o2); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, int o2) { return emit(code, o0, o1, Utils::asInt(o2)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, unsigned int o2) { return emit(code, o0, o1, Utils::asInt(o2)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, int64_t o2) { return emit(code, o0, o1, Utils::asInt(o2)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, uint64_t o2) { return emit(code, o0, o1, Utils::asInt(o2)); } + +#define INST_3ii(inst, code, T0, T1, T2) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2) { return emit(code, o0, o1, o2); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, int o1, int o2) { return emit(code, o0, Imm(o1), Utils::asInt(o2)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, unsigned int o1, unsigned int o2) { return emit(code, o0, Imm(o1), Utils::asInt(o2)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, int64_t o1, int64_t o2) { return emit(code, o0, Imm(o1), Utils::asInt(o2)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, uint64_t o1, uint64_t o2) { return emit(code, o0, Imm(o1), Utils::asInt(o2)); } + +#define INST_4x(inst, code, T0, T1, T2, T3) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return emit(code, o0, o1, o2, o3); } + +#define INST_4i(inst, code, T0, T1, T2, T3) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return emit(code, o0, o1, o2, o3); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2, int o3) { return emit(code, o0, o1, o2, Utils::asInt(o3)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2, unsigned int o3) { return emit(code, o0, o1, o2, Utils::asInt(o3)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2, int64_t o3) { return emit(code, o0, o1, o2, Utils::asInt(o3)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2, uint64_t o3) { return emit(code, o0, o1, o2, Utils::asInt(o3)); } + +#define INST_4ii(inst, code, T0, T1, T2, T3) \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return emit(code, o0, o1, o2, o3); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, int o2, int o3) { return emit(code, o0, o1, Imm(o2), Utils::asInt(o3)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, unsigned int o2, unsigned int o3) { return emit(code, o0, o1, Imm(o2), Utils::asInt(o3)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, int64_t o2, int64_t o3) { return emit(code, o0, o1, Imm(o2), Utils::asInt(o3)); } \ + /*! \overload */ \ + ASMJIT_INLINE Error inst(const T0& o0, const T1& o1, uint64_t o2, uint64_t o3) { return emit(code, o0, o1, Imm(o2), Utils::asInt(o3)); } + + // -------------------------------------------------------------------------- + // [X86/X64] + // -------------------------------------------------------------------------- + + //! Add with Carry. + INST_2x(adc, kX86InstIdAdc, X86GpReg, X86GpReg) + //! \overload + INST_2x(adc, kX86InstIdAdc, X86GpReg, X86Mem) + //! \overload + INST_2i(adc, kX86InstIdAdc, X86GpReg, Imm) + //! \overload + INST_2x(adc, kX86InstIdAdc, X86Mem, X86GpReg) + //! \overload + INST_2i(adc, kX86InstIdAdc, X86Mem, Imm) + + //! Add. + INST_2x(add, kX86InstIdAdd, X86GpReg, X86GpReg) + //! \overload + INST_2x(add, kX86InstIdAdd, X86GpReg, X86Mem) + //! \overload + INST_2i(add, kX86InstIdAdd, X86GpReg, Imm) + //! \overload + INST_2x(add, kX86InstIdAdd, X86Mem, X86GpReg) + //! \overload + INST_2i(add, kX86InstIdAdd, X86Mem, Imm) + + //! And. + INST_2x(and_, kX86InstIdAnd, X86GpReg, X86GpReg) + //! \overload + INST_2x(and_, kX86InstIdAnd, X86GpReg, X86Mem) + //! \overload + INST_2i(and_, kX86InstIdAnd, X86GpReg, Imm) + //! \overload + INST_2x(and_, kX86InstIdAnd, X86Mem, X86GpReg) + //! \overload + INST_2i(and_, kX86InstIdAnd, X86Mem, Imm) + + //! Bit scan forward. + INST_2x(bsf, kX86InstIdBsf, X86GpReg, X86GpReg) + //! \overload + INST_2x(bsf, kX86InstIdBsf, X86GpReg, X86Mem) + + //! Bit scan reverse. + INST_2x(bsr, kX86InstIdBsr, X86GpReg, X86GpReg) + //! \overload + INST_2x(bsr, kX86InstIdBsr, X86GpReg, X86Mem) + + //! Byte swap (32-bit or 64-bit registers only) (i486). + INST_1x(bswap, kX86InstIdBswap, X86GpReg) + + //! Bit test. + INST_2x(bt, kX86InstIdBt, X86GpReg, X86GpReg) + //! \overload + INST_2i(bt, kX86InstIdBt, X86GpReg, Imm) + //! \overload + INST_2x(bt, kX86InstIdBt, X86Mem, X86GpReg) + //! \overload + INST_2i(bt, kX86InstIdBt, X86Mem, Imm) + + //! Bit test and complement. + INST_2x(btc, kX86InstIdBtc, X86GpReg, X86GpReg) + //! \overload + INST_2i(btc, kX86InstIdBtc, X86GpReg, Imm) + //! \overload + INST_2x(btc, kX86InstIdBtc, X86Mem, X86GpReg) + //! \overload + INST_2i(btc, kX86InstIdBtc, X86Mem, Imm) + + //! Bit test and reset. + INST_2x(btr, kX86InstIdBtr, X86GpReg, X86GpReg) + //! \overload + INST_2i(btr, kX86InstIdBtr, X86GpReg, Imm) + //! \overload + INST_2x(btr, kX86InstIdBtr, X86Mem, X86GpReg) + //! \overload + INST_2i(btr, kX86InstIdBtr, X86Mem, Imm) + + //! Bit test and set. + INST_2x(bts, kX86InstIdBts, X86GpReg, X86GpReg) + //! \overload + INST_2i(bts, kX86InstIdBts, X86GpReg, Imm) + //! \overload + INST_2x(bts, kX86InstIdBts, X86Mem, X86GpReg) + //! \overload + INST_2i(bts, kX86InstIdBts, X86Mem, Imm) + + //! Call. + INST_1x(call, kX86InstIdCall, X86GpReg) + //! \overload + INST_1x(call, kX86InstIdCall, X86Mem) + //! \overload + INST_1x(call, kX86InstIdCall, Label) + //! \overload + INST_1x(call, kX86InstIdCall, Imm) + //! \overload + ASMJIT_INLINE Error call(Ptr o0) { return call(Imm(o0)); } + + //! Clear carry flag. + INST_0x(clc, kX86InstIdClc) + //! Clear direction flag. + INST_0x(cld, kX86InstIdCld) + //! Complement carry flag. + INST_0x(cmc, kX86InstIdCmc) + + //! Convert BYTE to WORD (AX <- Sign Extend AL). + INST_0x(cbw, kX86InstIdCbw) + //! Convert DWORD to QWORD (EDX:EAX <- Sign Extend EAX). + INST_0x(cdq, kX86InstIdCdq) + //! Convert DWORD to QWORD (RAX <- Sign Extend EAX) (X64 Only). + INST_0x(cdqe, kX86InstIdCdqe) + //! Convert QWORD to DQWORD (RDX:RAX <- Sign Extend RAX) (X64 Only). + INST_0x(cqo, kX86InstIdCqo) + //! Convert WORD to DWORD (DX:AX <- Sign Extend AX). + INST_0x(cwd, kX86InstIdCwd) + //! Convert WORD to DWORD (EAX <- Sign Extend AX). + INST_0x(cwde, kX86InstIdCwde) + + //! Conditional move. + INST_2cc(cmov, kX86InstIdCmov, X86Util::condToCmovcc, X86GpReg, X86GpReg) + //! Conditional move. + INST_2cc(cmov, kX86InstIdCmov, X86Util::condToCmovcc, X86GpReg, X86Mem) + + //! Compare two operands. + INST_2x(cmp, kX86InstIdCmp, X86GpReg, X86GpReg) + //! \overload + INST_2x(cmp, kX86InstIdCmp, X86GpReg, X86Mem) + //! \overload + INST_2i(cmp, kX86InstIdCmp, X86GpReg, Imm) + //! \overload + INST_2x(cmp, kX86InstIdCmp, X86Mem, X86GpReg) + //! \overload + INST_2i(cmp, kX86InstIdCmp, X86Mem, Imm) + + //! Compare BYTE in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(cmpsb, kX86InstIdCmpsB) + //! Compare DWORD in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(cmpsd, kX86InstIdCmpsD) + //! Compare QWORD in ES:[RDI] and DS:[RDI] (X64 Only). + INST_0x(cmpsq, kX86InstIdCmpsQ) + //! Compare WORD in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(cmpsw, kX86InstIdCmpsW) + + //! Compare and exchange (i486). + INST_2x(cmpxchg, kX86InstIdCmpxchg, X86GpReg, X86GpReg) + //! \overload + INST_2x(cmpxchg, kX86InstIdCmpxchg, X86Mem, X86GpReg) + + //! Compare and exchange 128-bit value in RDX:RAX with the memory operand (X64 Only). + INST_1x(cmpxchg16b, kX86InstIdCmpxchg16b, X86Mem) + //! Compare and exchange 64-bit value in EDX:EAX with the memory operand (Pentium). + INST_1x(cmpxchg8b, kX86InstIdCmpxchg8b, X86Mem) + + //! CPU identification (i486). + INST_0x(cpuid, kX86InstIdCpuid) + + //! Decimal adjust AL after addition (X86 Only). + INST_0x(daa, kX86InstIdDaa) + //! Decimal adjust AL after subtraction (X86 Only). + INST_0x(das, kX86InstIdDas) + + //! Decrement by 1. + INST_1x(dec, kX86InstIdDec, X86GpReg) + //! \overload + INST_1x(dec, kX86InstIdDec, X86Mem) + + //! Unsigned divide (xDX:xAX <- xDX:xAX / o0). + INST_1x(div, kX86InstIdDiv, X86GpReg) + //! \overload + INST_1x(div, kX86InstIdDiv, X86Mem) + + //! Make stack frame for procedure parameters. + INST_2x(enter, kX86InstIdEnter, Imm, Imm) + + //! Signed divide (xDX:xAX <- xDX:xAX / op). + INST_1x(idiv, kX86InstIdIdiv, X86GpReg) + //! \overload + INST_1x(idiv, kX86InstIdIdiv, X86Mem) + + //! Signed multiply (xDX:xAX <- xAX * o0). + INST_1x(imul, kX86InstIdImul, X86GpReg) + //! \overload + INST_1x(imul, kX86InstIdImul, X86Mem) + + //! Signed multiply. + INST_2x(imul, kX86InstIdImul, X86GpReg, X86GpReg) + //! \overload + INST_2x(imul, kX86InstIdImul, X86GpReg, X86Mem) + //! \overload + INST_2i(imul, kX86InstIdImul, X86GpReg, Imm) + + //! Signed multiply. + INST_3i(imul, kX86InstIdImul, X86GpReg, X86GpReg, Imm) + //! \overload + INST_3i(imul, kX86InstIdImul, X86GpReg, X86Mem, Imm) + + //! Increment by 1. + INST_1x(inc, kX86InstIdInc, X86GpReg) + //! \overload + INST_1x(inc, kX86InstIdInc, X86Mem) + + //! Interrupt. + INST_1i(int_, kX86InstIdInt, Imm) + //! Interrupt 3 - trap to debugger. + ASMJIT_INLINE Error int3() { return int_(3); } + + //! Jump to `label` if condition `cc` is met. + INST_1cc(j, kX86InstIdJ, X86Util::condToJcc, Label) + + //! Short jump if CX/ECX/RCX is zero. + INST_2x(jecxz, kX86InstIdJecxz, X86GpReg, Label) + + //! Jump. + INST_1x(jmp, kX86InstIdJmp, X86GpReg) + //! \overload + INST_1x(jmp, kX86InstIdJmp, X86Mem) + //! \overload + INST_1x(jmp, kX86InstIdJmp, Label) + //! \overload + INST_1x(jmp, kX86InstIdJmp, Imm) + //! \overload + ASMJIT_INLINE Error jmp(Ptr dst) { return jmp(Imm(dst)); } + + //! Load AH from flags. + INST_0x(lahf, kX86InstIdLahf) + + //! Load effective address + INST_2x(lea, kX86InstIdLea, X86GpReg, X86Mem) + + //! High level procedure exit. + INST_0x(leave, kX86InstIdLeave) + + //! Load BYTE from DS:[ESI/RSI] to AL. + INST_0x(lodsb, kX86InstIdLodsB) + //! Load DWORD from DS:[ESI/RSI] to EAX. + INST_0x(lodsd, kX86InstIdLodsD) + //! Load QWORD from DS:[RDI] to RAX (X64 Only). + INST_0x(lodsq, kX86InstIdLodsQ) + //! Load WORD from DS:[ESI/RSI] to AX. + INST_0x(lodsw, kX86InstIdLodsW) + + //! Move. + INST_2x(mov, kX86InstIdMov, X86GpReg, X86GpReg) + //! \overload + INST_2x(mov, kX86InstIdMov, X86GpReg, X86Mem) + //! \overload + INST_2i(mov, kX86InstIdMov, X86GpReg, Imm) + //! \overload + INST_2x(mov, kX86InstIdMov, X86Mem, X86GpReg) + //! \overload + INST_2i(mov, kX86InstIdMov, X86Mem, Imm) + + //! Move from segment register. + INST_2x(mov, kX86InstIdMov, X86GpReg, X86SegReg) + //! \overload + INST_2x(mov, kX86InstIdMov, X86Mem, X86SegReg) + //! Move to segment register. + INST_2x(mov, kX86InstIdMov, X86SegReg, X86GpReg) + //! \overload + INST_2x(mov, kX86InstIdMov, X86SegReg, X86Mem) + + //! Move (AL|AX|EAX|RAX <- absolute address in immediate). + INST_2x(mov_ptr, kX86InstIdMovPtr, X86GpReg, Imm); + //! \overload + ASMJIT_INLINE Error mov_ptr(const X86GpReg& o0, Ptr o1) { + return emit(kX86InstIdMovPtr, o0, Imm(o1)); + } + + //! Move (absolute address in immediate <- AL|AX|EAX|RAX). + INST_2x(mov_ptr, kX86InstIdMovPtr, Imm, X86GpReg); + //! \overload + ASMJIT_INLINE Error mov_ptr(Ptr o0, const X86GpReg& o1) { + return emit(kX86InstIdMovPtr, Imm(o0), o1); + } + + //! Move data after dwapping bytes (SSE3 - Atom). + INST_2x(movbe, kX86InstIdMovbe, X86GpReg, X86Mem); + //! \overload + INST_2x(movbe, kX86InstIdMovbe, X86Mem, X86GpReg); + + //! Move BYTE from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(movsb, kX86InstIdMovsB) + //! Move DWORD from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(movsd, kX86InstIdMovsD) + //! Move QWORD from DS:[RSI] to ES:[RDI] (X64 Only). + INST_0x(movsq, kX86InstIdMovsQ) + //! Move WORD from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(movsw, kX86InstIdMovsW) + + //! Move with sign-extension. + INST_2x(movsx, kX86InstIdMovsx, X86GpReg, X86GpReg) + //! \overload + INST_2x(movsx, kX86InstIdMovsx, X86GpReg, X86Mem) + + //! Move DWORD to QWORD with sign-extension (X64 Only). + INST_2x(movsxd, kX86InstIdMovsxd, X86GpReg, X86GpReg) + //! \overload + INST_2x(movsxd, kX86InstIdMovsxd, X86GpReg, X86Mem) + + //! Move with zero-extension. + INST_2x(movzx, kX86InstIdMovzx, X86GpReg, X86GpReg) + //! \overload + INST_2x(movzx, kX86InstIdMovzx, X86GpReg, X86Mem) + + //! Unsigned multiply (xDX:xAX <- xAX * o0). + INST_1x(mul, kX86InstIdMul, X86GpReg) + //! \overload + INST_1x(mul, kX86InstIdMul, X86Mem) + + //! Two's complement negation. + INST_1x(neg, kX86InstIdNeg, X86GpReg) + //! \overload + INST_1x(neg, kX86InstIdNeg, X86Mem) + + //! No operation. + INST_0x(nop, kX86InstIdNop) + + //! One's complement negation. + INST_1x(not_, kX86InstIdNot, X86GpReg) + //! \overload + INST_1x(not_, kX86InstIdNot, X86Mem) + + //! Or. + INST_2x(or_, kX86InstIdOr, X86GpReg, X86GpReg) + //! \overload + INST_2x(or_, kX86InstIdOr, X86GpReg, X86Mem) + //! \overload + INST_2i(or_, kX86InstIdOr, X86GpReg, Imm) + //! \overload + INST_2x(or_, kX86InstIdOr, X86Mem, X86GpReg) + //! \overload + INST_2i(or_, kX86InstIdOr, X86Mem, Imm) + + //! Pop a value from the stack. + INST_1x(pop, kX86InstIdPop, X86GpReg) + //! \overload + INST_1x(pop, kX86InstIdPop, X86Mem) + + //! Pop a segment register from the stack. + //! + //! NOTE: There is no instruction to pop a cs segment register. + INST_1x(pop, kX86InstIdPop, X86SegReg); + + //! Pop all Gp registers - EDI|ESI|EBP|Ign|EBX|EDX|ECX|EAX (X86 Only). + INST_0x(popa, kX86InstIdPopa) + + //! Pop stack into EFLAGS register (32-bit or 64-bit). + INST_0x(popf, kX86InstIdPopf) + + //! Push WORD or DWORD/QWORD on the stack. + INST_1x(push, kX86InstIdPush, X86GpReg) + //! Push WORD or DWORD/QWORD on the stack. + INST_1x(push, kX86InstIdPush, X86Mem) + //! Push segment register on the stack. + INST_1x(push, kX86InstIdPush, X86SegReg) + //! Push WORD or DWORD/QWORD on the stack. + INST_1i(push, kX86InstIdPush, Imm) + + //! Push all Gp registers - EAX|ECX|EDX|EBX|ESP|EBP|ESI|EDI (X86 Only). + INST_0x(pusha, kX86InstIdPusha) + + //! Push EFLAGS register (32-bit or 64-bit) on the stack. + INST_0x(pushf, kX86InstIdPushf) + + //! Rotate bits left. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(rcl, kX86InstIdRcl, X86GpReg, X86GpReg) + //! \overload + INST_2x(rcl, kX86InstIdRcl, X86Mem, X86GpReg) + //! Rotate bits left. + INST_2i(rcl, kX86InstIdRcl, X86GpReg, Imm) + //! \overload + INST_2i(rcl, kX86InstIdRcl, X86Mem, Imm) + + //! Rotate bits right. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(rcr, kX86InstIdRcr, X86GpReg, X86GpReg) + //! \overload + INST_2x(rcr, kX86InstIdRcr, X86Mem, X86GpReg) + //! Rotate bits right. + INST_2i(rcr, kX86InstIdRcr, X86GpReg, Imm) + //! \overload + INST_2i(rcr, kX86InstIdRcr, X86Mem, Imm) + + //! Read time-stamp counter (Pentium). + INST_0x(rdtsc, kX86InstIdRdtsc) + //! Read time-stamp counter and processor id (Pentium). + INST_0x(rdtscp, kX86InstIdRdtscp) + + //! Repeated load ECX/RCX BYTEs from DS:[ESI/RSI] to AL. + INST_0x(rep_lodsb, kX86InstIdRepLodsB) + //! Repeated load ECX/RCX DWORDs from DS:[ESI/RSI] to EAX. + INST_0x(rep_lodsd, kX86InstIdRepLodsD) + //! Repeated load ECX/RCX QWORDs from DS:[RDI] to RAX (X64 Only). + INST_0x(rep_lodsq, kX86InstIdRepLodsQ) + //! Repeated load ECX/RCX WORDs from DS:[ESI/RSI] to AX. + INST_0x(rep_lodsw, kX86InstIdRepLodsW) + + //! Repeated move ECX/RCX BYTEs from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(rep_movsb, kX86InstIdRepMovsB) + //! Repeated move ECX/RCX DWORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(rep_movsd, kX86InstIdRepMovsD) + //! Repeated move ECX/RCX QWORDs from DS:[RSI] to ES:[RDI] (X64 Only). + INST_0x(rep_movsq, kX86InstIdRepMovsQ) + //! Repeated move ECX/RCX WORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(rep_movsw, kX86InstIdRepMovsW) + + //! Repeated fill ECX/RCX BYTEs at ES:[EDI/RDI] with AL. + INST_0x(rep_stosb, kX86InstIdRepStosB) + //! Repeated fill ECX/RCX DWORDs at ES:[EDI/RDI] with EAX. + INST_0x(rep_stosd, kX86InstIdRepStosD) + //! Repeated fill ECX/RCX QWORDs at ES:[RDI] with RAX (X64 Only). + INST_0x(rep_stosq, kX86InstIdRepStosQ) + //! Repeated fill ECX/RCX WORDs at ES:[EDI/RDI] with AX. + INST_0x(rep_stosw, kX86InstIdRepStosW) + + //! Repeated find non-AL BYTEs in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(repe_cmpsb, kX86InstIdRepeCmpsB) + //! Repeated find non-EAX DWORDs in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(repe_cmpsd, kX86InstIdRepeCmpsD) + //! Repeated find non-RAX QWORDs in ES:[RDI] and DS:[RDI] (X64 Only). + INST_0x(repe_cmpsq, kX86InstIdRepeCmpsQ) + //! Repeated find non-AX WORDs in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(repe_cmpsw, kX86InstIdRepeCmpsW) + + //! Repeated find non-AL BYTE starting at ES:[EDI/RDI]. + INST_0x(repe_scasb, kX86InstIdRepeScasB) + //! Repeated find non-EAX DWORD starting at ES:[EDI/RDI]. + INST_0x(repe_scasd, kX86InstIdRepeScasD) + //! Repeated find non-RAX QWORD starting at ES:[RDI] (X64 Only). + INST_0x(repe_scasq, kX86InstIdRepeScasQ) + //! Repeated find non-AX WORD starting at ES:[EDI/RDI]. + INST_0x(repe_scasw, kX86InstIdRepeScasW) + + //! Repeated find AL BYTEs in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(repne_cmpsb, kX86InstIdRepneCmpsB) + //! Repeated find EAX DWORDs in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(repne_cmpsd, kX86InstIdRepneCmpsD) + //! Repeated find RAX QWORDs in ES:[RDI] and DS:[RDI] (X64 Only). + INST_0x(repne_cmpsq, kX86InstIdRepneCmpsQ) + //! Repeated find AX WORDs in ES:[EDI/RDI] and DS:[ESI/RSI]. + INST_0x(repne_cmpsw, kX86InstIdRepneCmpsW) + + //! Repeated find AL BYTEs starting at ES:[EDI/RDI]. + INST_0x(repne_scasb, kX86InstIdRepneScasB) + //! Repeated find EAX DWORDs starting at ES:[EDI/RDI]. + INST_0x(repne_scasd, kX86InstIdRepneScasD) + //! Repeated find RAX QWORDs starting at ES:[RDI] (X64 Only). + INST_0x(repne_scasq, kX86InstIdRepneScasQ) + //! Repeated find AX WORDs starting at ES:[EDI/RDI]. + INST_0x(repne_scasw, kX86InstIdRepneScasW) + + //! Return. + INST_0x(ret, kX86InstIdRet) + //! \overload + INST_1i(ret, kX86InstIdRet, Imm) + + //! Rotate bits left. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(rol, kX86InstIdRol, X86GpReg, X86GpReg) + //! \overload + INST_2x(rol, kX86InstIdRol, X86Mem, X86GpReg) + //! Rotate bits left. + INST_2i(rol, kX86InstIdRol, X86GpReg, Imm) + //! \overload + INST_2i(rol, kX86InstIdRol, X86Mem, Imm) + + //! Rotate bits right. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(ror, kX86InstIdRor, X86GpReg, X86GpReg) + //! \overload + INST_2x(ror, kX86InstIdRor, X86Mem, X86GpReg) + //! Rotate bits right. + INST_2i(ror, kX86InstIdRor, X86GpReg, Imm) + //! \overload + INST_2i(ror, kX86InstIdRor, X86Mem, Imm) + + //! Store AH into flags. + INST_0x(sahf, kX86InstIdSahf) + + //! Integer subtraction with borrow. + INST_2x(sbb, kX86InstIdSbb, X86GpReg, X86GpReg) + //! \overload + INST_2x(sbb, kX86InstIdSbb, X86GpReg, X86Mem) + //! \overload + INST_2i(sbb, kX86InstIdSbb, X86GpReg, Imm) + //! \overload + INST_2x(sbb, kX86InstIdSbb, X86Mem, X86GpReg) + //! \overload + INST_2i(sbb, kX86InstIdSbb, X86Mem, Imm) + + //! Shift bits left. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(sal, kX86InstIdSal, X86GpReg, X86GpReg) + //! \overload + INST_2x(sal, kX86InstIdSal, X86Mem, X86GpReg) + //! Shift bits left. + INST_2i(sal, kX86InstIdSal, X86GpReg, Imm) + //! \overload + INST_2i(sal, kX86InstIdSal, X86Mem, Imm) + + //! Shift bits right. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(sar, kX86InstIdSar, X86GpReg, X86GpReg) + //! \overload + INST_2x(sar, kX86InstIdSar, X86Mem, X86GpReg) + //! Shift bits right. + INST_2i(sar, kX86InstIdSar, X86GpReg, Imm) + //! \overload + INST_2i(sar, kX86InstIdSar, X86Mem, Imm) + + //! Find non-AL BYTE starting at ES:[EDI/RDI]. + INST_0x(scasb, kX86InstIdScasB) + //! Find non-EAX DWORD starting at ES:[EDI/RDI]. + INST_0x(scasd, kX86InstIdScasD) + //! Find non-rax QWORD starting at ES:[RDI] (X64 Only). + INST_0x(scasq, kX86InstIdScasQ) + //! Find non-AX WORD starting at ES:[EDI/RDI]. + INST_0x(scasw, kX86InstIdScasW) + + //! Set byte on condition. + INST_1cc(set, kX86InstIdSet, X86Util::condToSetcc, X86GpReg) + //! Set byte on condition. + INST_1cc(set, kX86InstIdSet, X86Util::condToSetcc, X86Mem) + + //! Shift bits left. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(shl, kX86InstIdShl, X86GpReg, X86GpReg) + //! \overload + INST_2x(shl, kX86InstIdShl, X86Mem, X86GpReg) + //! Shift bits left. + INST_2i(shl, kX86InstIdShl, X86GpReg, Imm) + //! \overload + INST_2i(shl, kX86InstIdShl, X86Mem, Imm) + + //! Shift bits right. + //! + //! NOTE: `o1` register can be only `cl`. + INST_2x(shr, kX86InstIdShr, X86GpReg, X86GpReg) + //! \overload + INST_2x(shr, kX86InstIdShr, X86Mem, X86GpReg) + //! Shift bits right. + INST_2i(shr, kX86InstIdShr, X86GpReg, Imm) + //! \overload + INST_2i(shr, kX86InstIdShr, X86Mem, Imm) + + //! Double precision shift left. + //! + //! NOTE: `o2` register can be only `cl` register. + INST_3x(shld, kX86InstIdShld, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(shld, kX86InstIdShld, X86Mem, X86GpReg, X86GpReg) + //! Double precision shift left. + INST_3i(shld, kX86InstIdShld, X86GpReg, X86GpReg, Imm) + //! \overload + INST_3i(shld, kX86InstIdShld, X86Mem, X86GpReg, Imm) + + //! Double precision shift right. + //! + //! NOTE: `o2` register can be only `cl` register. + INST_3x(shrd, kX86InstIdShrd, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(shrd, kX86InstIdShrd, X86Mem, X86GpReg, X86GpReg) + //! Double precision shift right. + INST_3i(shrd, kX86InstIdShrd, X86GpReg, X86GpReg, Imm) + //! \overload + INST_3i(shrd, kX86InstIdShrd, X86Mem, X86GpReg, Imm) + + //! Set carry flag to 1. + INST_0x(stc, kX86InstIdStc) + //! Set direction flag to 1. + INST_0x(std, kX86InstIdStd) + + //! Fill BYTE at ES:[EDI/RDI] with AL. + INST_0x(stosb, kX86InstIdStosB) + //! Fill DWORD at ES:[EDI/RDI] with EAX. + INST_0x(stosd, kX86InstIdStosD) + //! Fill QWORD at ES:[RDI] with RAX (X64 Only). + INST_0x(stosq, kX86InstIdStosQ) + //! Fill WORD at ES:[EDI/RDI] with AX. + INST_0x(stosw, kX86InstIdStosW) + + //! Subtract. + INST_2x(sub, kX86InstIdSub, X86GpReg, X86GpReg) + //! \overload + INST_2x(sub, kX86InstIdSub, X86GpReg, X86Mem) + //! \overload + INST_2i(sub, kX86InstIdSub, X86GpReg, Imm) + //! \overload + INST_2x(sub, kX86InstIdSub, X86Mem, X86GpReg) + //! \overload + INST_2i(sub, kX86InstIdSub, X86Mem, Imm) + + //! Logical compare. + INST_2x(test, kX86InstIdTest, X86GpReg, X86GpReg) + //! \overload + INST_2i(test, kX86InstIdTest, X86GpReg, Imm) + //! \overload + INST_2x(test, kX86InstIdTest, X86Mem, X86GpReg) + //! \overload + INST_2i(test, kX86InstIdTest, X86Mem, Imm) + + //! Undefined instruction - Raise #UD exception. + INST_0x(ud2, kX86InstIdUd2) + + //! Exchange and Add. + INST_2x(xadd, kX86InstIdXadd, X86GpReg, X86GpReg) + //! \overload + INST_2x(xadd, kX86InstIdXadd, X86Mem, X86GpReg) + + //! Exchange register/memory with register. + INST_2x(xchg, kX86InstIdXchg, X86GpReg, X86GpReg) + //! \overload + INST_2x(xchg, kX86InstIdXchg, X86Mem, X86GpReg) + //! \overload + INST_2x(xchg, kX86InstIdXchg, X86GpReg, X86Mem) + + //! Xor. + INST_2x(xor_, kX86InstIdXor, X86GpReg, X86GpReg) + //! \overload + INST_2x(xor_, kX86InstIdXor, X86GpReg, X86Mem) + //! \overload + INST_2i(xor_, kX86InstIdXor, X86GpReg, Imm) + //! \overload + INST_2x(xor_, kX86InstIdXor, X86Mem, X86GpReg) + //! \overload + INST_2i(xor_, kX86InstIdXor, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [FPU] + // -------------------------------------------------------------------------- + + //! Compute `2^x - 1` - `fp0 = POW(2, fp0) - 1` (FPU). + INST_0x(f2xm1, kX86InstIdF2xm1) + //! Abs `fp0 = ABS(fp0)` (FPU). + INST_0x(fabs, kX86InstIdFabs) + + //! Add `o0 = o0 + o1` (one operand has to be `fp0`) (FPU). + INST_2x(fadd, kX86InstIdFadd, X86FpReg, X86FpReg) + //! Add `fp0 = fp0 + float_or_double[o0]` (FPU). + INST_1x(fadd, kX86InstIdFadd, X86Mem) + //! Add `o0 = o0 + fp0` and POP (FPU). + INST_1x(faddp, kX86InstIdFaddp, X86FpReg) + //! Add `fp1 = fp1 + fp0` and POP (FPU). + INST_0x(faddp, kX86InstIdFaddp) + + //! Load BCD from `[o0]` and PUSH (FPU). + INST_1x(fbld, kX86InstIdFbld, X86Mem) + //! Store BCD-Integer to `[o0]` and POP (FPU). + INST_1x(fbstp, kX86InstIdFbstp, X86Mem) + + //! Complement Sign `fp0 = -fp0` (FPU). + INST_0x(fchs, kX86InstIdFchs) + + //! Clear exceptions (FPU). + INST_0x(fclex, kX86InstIdFclex) + + //! Conditional move `if (CF=1) fp0 = o0` (FPU). + INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg) + //! Conditional move `if (CF|ZF=1) fp0 = o0` (FPU). + INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg) + //! Conditional move `if (ZF=1) fp0 = o0` (FPU). + INST_1x(fcmove, kX86InstIdFcmove, X86FpReg) + //! Conditional move `if (CF=0) fp0 = o0` (FPU). + INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg) + //! Conditional move `if (CF|ZF=0) fp0 = o0` (FPU). + INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg) + //! Conditional move `if (ZF=0) fp0 = o0` (FPU). + INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg) + //! Conditional move `if (PF=0) fp0 = o0` (FPU). + INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg) + //! Conditional move `if (PF=1) fp0 = o0` (FPU). + INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg) + + //! Compare `fp0` with `o0` (FPU). + INST_1x(fcom, kX86InstIdFcom, X86FpReg) + //! Compare `fp0` with `fp1` (FPU). + INST_0x(fcom, kX86InstIdFcom) + //! Compare `fp0` with `float_or_double[o0]` (FPU). + INST_1x(fcom, kX86InstIdFcom, X86Mem) + //! Compare `fp0` with `o0` and POP (FPU). + INST_1x(fcomp, kX86InstIdFcomp, X86FpReg) + //! Compare `fp0` with `fp1` and POP (FPU). + INST_0x(fcomp, kX86InstIdFcomp) + //! Compare `fp0` with `float_or_double[o0]` and POP (FPU). + INST_1x(fcomp, kX86InstIdFcomp, X86Mem) + //! Compare `fp0` with `fp1` and POP twice (FPU). + INST_0x(fcompp, kX86InstIdFcompp) + //! Compare `fp0` with `o0` and set EFLAGS (FPU). + INST_1x(fcomi, kX86InstIdFcomi, X86FpReg) + //! Compare `fp0` with `o0` and set EFLAGS and POP (FPU). + INST_1x(fcomip, kX86InstIdFcomip, X86FpReg) + + //! Cos `fp0 = cos(fp0)` (FPU). + INST_0x(fcos, kX86InstIdFcos) + + //! Decrement FPU stack pointer (FPU). + INST_0x(fdecstp, kX86InstIdFdecstp) + + //! Divide `o0 = o0 / o1` (one has to be `fp0`) (FPU). + INST_2x(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg) + //! Divide `fp0 = fp0 / float_or_double[o0]` (FPU). + INST_1x(fdiv, kX86InstIdFdiv, X86Mem) + //! Divide `o0 = o0 / fp0` and POP (FPU). + INST_1x(fdivp, kX86InstIdFdivp, X86FpReg) + //! Divide `fp1 = fp1 / fp0` and POP (FPU). + INST_0x(fdivp, kX86InstIdFdivp) + + //! Reverse divide `o0 = o1 / o0` (one has to be `fp0`) (FPU). + INST_2x(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg) + //! Reverse divide `fp0 = float_or_double[o0] / fp0` (FPU). + INST_1x(fdivr, kX86InstIdFdivr, X86Mem) + //! Reverse divide `o0 = fp0 / o0` and POP (FPU). + INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg) + //! Reverse divide `fp1 = fp0 / fp1` and POP (FPU). + INST_0x(fdivrp, kX86InstIdFdivrp) + + //! Free FP register (FPU). + INST_1x(ffree, kX86InstIdFfree, X86FpReg) + + //! Add `fp0 = fp0 + short_or_int[o0]` (FPU). + INST_1x(fiadd, kX86InstIdFiadd, X86Mem) + //! Compare `fp0` with `short_or_int[o0]` (FPU). + INST_1x(ficom, kX86InstIdFicom, X86Mem) + //! Compare `fp0` with `short_or_int[o0]` and POP (FPU). + INST_1x(ficomp, kX86InstIdFicomp, X86Mem) + //! Divide `fp0 = fp0 / short_or_int[o0]` (FPU). + INST_1x(fidiv, kX86InstIdFidiv, X86Mem) + //! Reverse divide `fp0 = short_or_int[o0] / fp0` (FPU). + INST_1x(fidivr, kX86InstIdFidivr, X86Mem) + + //! Load `short_or_int_or_long[o0]` and PUSH (FPU). + INST_1x(fild, kX86InstIdFild, X86Mem) + //! Multiply `fp0 *= short_or_int[o0]` (FPU). + INST_1x(fimul, kX86InstIdFimul, X86Mem) + + //! Increment FPU stack pointer (FPU). + INST_0x(fincstp, kX86InstIdFincstp) + //! Initialize FPU (FPU). + INST_0x(finit, kX86InstIdFinit) + + //! Subtract `fp0 = fp0 - short_or_int[o0]` (FPU). + INST_1x(fisub, kX86InstIdFisub, X86Mem) + //! Reverse subtract `fp0 = short_or_int[o0] - fp0` (FPU). + INST_1x(fisubr, kX86InstIdFisubr, X86Mem) + + //! Initialize FPU without checking for pending unmasked exceptions (FPU). + INST_0x(fninit, kX86InstIdFninit) + + //! Store `fp0` as `short_or_int[o0]` (FPU). + INST_1x(fist, kX86InstIdFist, X86Mem) + //! Store `fp0` as `short_or_int_or_long[o0]` and POP (FPU). + INST_1x(fistp, kX86InstIdFistp, X86Mem) + + //! Load `float_or_double_or_extended[o0]` and PUSH (FPU). + INST_1x(fld, kX86InstIdFld, X86Mem) + //! PUSH `o0` (FPU). + INST_1x(fld, kX86InstIdFld, X86FpReg) + + //! PUSH `1.0` (FPU). + INST_0x(fld1, kX86InstIdFld1) + //! PUSH `log2(10)` (FPU). + INST_0x(fldl2t, kX86InstIdFldl2t) + //! PUSH `log2(e)` (FPU). + INST_0x(fldl2e, kX86InstIdFldl2e) + //! PUSH `pi` (FPU). + INST_0x(fldpi, kX86InstIdFldpi) + //! PUSH `log10(2)` (FPU). + INST_0x(fldlg2, kX86InstIdFldlg2) + //! PUSH `ln(2)` (FPU). + INST_0x(fldln2, kX86InstIdFldln2) + //! PUSH `+0.0` (FPU). + INST_0x(fldz, kX86InstIdFldz) + + //! Load x87 FPU control word from `word_ptr[o0]` (FPU). + INST_1x(fldcw, kX86InstIdFldcw, X86Mem) + //! Load x87 FPU environment (14 or 28 bytes) from `[o0]` (FPU). + INST_1x(fldenv, kX86InstIdFldenv, X86Mem) + + //! Multiply `o0 = o0 * o1` (one has to be `fp0`) (FPU). + INST_2x(fmul, kX86InstIdFmul, X86FpReg, X86FpReg) + //! Multiply `fp0 = fp0 * float_or_double[o0]` (FPU). + INST_1x(fmul, kX86InstIdFmul, X86Mem) + //! Multiply `o0 = o0 * fp0` and POP (FPU). + INST_1x(fmulp, kX86InstIdFmulp, X86FpReg) + //! Multiply `fp1 = fp1 * fp0` and POP (FPU). + INST_0x(fmulp, kX86InstIdFmulp) + + //! Clear exceptions (FPU). + INST_0x(fnclex, kX86InstIdFnclex) + //! No operation (FPU). + INST_0x(fnop, kX86InstIdFnop) + //! Save FPU state to `[o0]` (FPU). + INST_1x(fnsave, kX86InstIdFnsave, X86Mem) + //! Store x87 FPU environment to `[o0]` (FPU). + INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem) + //! Store x87 FPU control word to `[o0]` (FPU). + INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem) + + //! Store x87 FPU status word to `o0` (AX) (FPU). + INST_1x(fnstsw, kX86InstIdFnstsw, X86GpReg) + //! Store x87 FPU status word to `word_ptr[o0]` (FPU). + INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem) + + //! Partial Arctan `fp1 = atan2(fp1, fp0)` and POP (FPU). + INST_0x(fpatan, kX86InstIdFpatan) + //! Partial Remainder[Trunc] `fp1 = fp0 % fp1` and POP (FPU). + INST_0x(fprem, kX86InstIdFprem) + //! Partial Remainder[Round] `fp1 = fp0 % fp1` and POP (FPU). + INST_0x(fprem1, kX86InstIdFprem1) + //! Partial Tan `fp0 = tan(fp0)` and PUSH `1.0` (FPU). + INST_0x(fptan, kX86InstIdFptan) + //! Round `fp0 = round(fp0)` (FPU). + INST_0x(frndint, kX86InstIdFrndint) + + //! Restore FPU state from `[o0]` (94 or 108 bytes) (FPU). + INST_1x(frstor, kX86InstIdFrstor, X86Mem) + //! Save FPU state to `[o0]` (94 or 108 bytes) (FPU). + INST_1x(fsave, kX86InstIdFsave, X86Mem) + + //! Scale `fp0 = fp0 * pow(2, RoundTowardsZero(fp1))` (FPU). + INST_0x(fscale, kX86InstIdFscale) + //! Sin `fp0 = sin(fp0)` (FPU). + INST_0x(fsin, kX86InstIdFsin) + //! Sincos `fp0 = sin(fp0)` and PUSH `cos(fp0)` (FPU). + INST_0x(fsincos, kX86InstIdFsincos) + //! Square root `fp0 = sqrt(fp0)` (FPU). + INST_0x(fsqrt, kX86InstIdFsqrt) + + //! Store floating point value to `float_or_double[o0]` (FPU). + INST_1x(fst, kX86InstIdFst, X86Mem) + //! Copy `o0 = fp0` (FPU). + INST_1x(fst, kX86InstIdFst, X86FpReg) + //! Store floating point value to `float_or_double_or_extended[o0]` and POP (FPU). + INST_1x(fstp, kX86InstIdFstp, X86Mem) + //! Copy `o0 = fp0` and POP (FPU). + INST_1x(fstp, kX86InstIdFstp, X86FpReg) + + //! Store x87 FPU control word to `word_ptr[o0]` (FPU). + INST_1x(fstcw, kX86InstIdFstcw, X86Mem) + //! Store x87 FPU environment to `[o0]` (14 or 28 bytes) (FPU). + INST_1x(fstenv, kX86InstIdFstenv, X86Mem) + //! Store x87 FPU status word to `o0` (AX) (FPU). + INST_1x(fstsw, kX86InstIdFstsw, X86GpReg) + //! Store x87 FPU status word to `word_ptr[o0]` (FPU). + INST_1x(fstsw, kX86InstIdFstsw, X86Mem) + + //! Subtract `o0 = o0 - o1` (one has to be `fp0`) (FPU). + INST_2x(fsub, kX86InstIdFsub, X86FpReg, X86FpReg) + //! Subtract `fp0 = fp0 - float_or_double[o0]` (FPU). + INST_1x(fsub, kX86InstIdFsub, X86Mem) + //! Subtract `o0 = o0 - fp0` and POP (FPU). + INST_1x(fsubp, kX86InstIdFsubp, X86FpReg) + //! Subtract `fp1 = fp1 - fp0` and POP (FPU). + INST_0x(fsubp, kX86InstIdFsubp) + + //! Reverse subtract `o0 = o1 - o0` (one has to be `fp0`) (FPU). + INST_2x(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg) + //! Reverse subtract `fp0 = fp0 - float_or_double[o0]` (FPU). + INST_1x(fsubr, kX86InstIdFsubr, X86Mem) + //! Reverse subtract `o0 = o0 - fp0` and POP (FPU). + INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg) + //! Reverse subtract `fp1 = fp1 - fp0` and POP (FPU). + INST_0x(fsubrp, kX86InstIdFsubrp) + + //! Compare `fp0` with `0.0` (FPU). + INST_0x(ftst, kX86InstIdFtst) + + //! Unordered compare `fp0` with `o0` (FPU). + INST_1x(fucom, kX86InstIdFucom, X86FpReg) + //! Unordered compare `fp0` with `fp1` (FPU). + INST_0x(fucom, kX86InstIdFucom) + //! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS (FPU). + INST_1x(fucomi, kX86InstIdFucomi, X86FpReg) + //! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS and POP (FPU). + INST_1x(fucomip, kX86InstIdFucomip, X86FpReg) + //! Unordered compare `fp0` with `o0` and POP (FPU). + INST_1x(fucomp, kX86InstIdFucomp, X86FpReg) + //! Unordered compare `fp0` with `fp1` and POP (FPU). + INST_0x(fucomp, kX86InstIdFucomp) + //! Unordered compare `fp0` with `fp1` and POP twice (FPU). + INST_0x(fucompp, kX86InstIdFucompp) + + INST_0x(fwait, kX86InstIdFwait) + + //! Examine fp0 (FPU). + INST_0x(fxam, kX86InstIdFxam) + //! Exchange `fp0` with `o0` (FPU). + INST_1x(fxch, kX86InstIdFxch, X86FpReg) + + //! Extract `fp0 = exponent(fp0)` and PUSH `significant(fp0)` (FPU). + INST_0x(fxtract, kX86InstIdFxtract) + + //! Compute `fp1 = fp1 * log2(fp0)` and POP (FPU). + INST_0x(fyl2x, kX86InstIdFyl2x) + //! Compute `fp1 = fp1 * log2(fp0 + 1)` and POP (FPU). + INST_0x(fyl2xp1, kX86InstIdFyl2xp1) + + // -------------------------------------------------------------------------- + // [FXSR] + // -------------------------------------------------------------------------- + + //! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR). + INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem) + //! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR & X64). + INST_1x(fxrstor64, kX86InstIdFxrstor64, X86Mem) + //! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR). + INST_1x(fxsave, kX86InstIdFxsave, X86Mem) + //! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR & X46). + INST_1x(fxsave64, kX86InstIdFxsave64, X86Mem) + + // -------------------------------------------------------------------------- + // [XSAVE] + // -------------------------------------------------------------------------- + + //! Restore Processor Extended States specified by `EDX:EAX` (XSAVE). + INST_1x(xrstor, kX86InstIdXrstor, X86Mem) + //! Restore Processor Extended States specified by `EDX:EAX` (XSAVE & X64). + INST_1x(xrstor64, kX86InstIdXrstor64, X86Mem) + + //! Save Processor Extended States specified by `EDX:EAX` (XSAVE). + INST_1x(xsave, kX86InstIdXsave, X86Mem) + //! Save Processor Extended States specified by `EDX:EAX` (XSAVE & X64). + INST_1x(xsave64, kX86InstIdXsave64, X86Mem) + + //! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT). + INST_1x(xsaveopt, kX86InstIdXsaveopt, X86Mem) + //! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT & X64). + INST_1x(xsaveopt64, kX86InstIdXsaveopt64, X86Mem) + + //! Get XCR - `EDX:EAX <- XCR[ECX]` (XSAVE). + INST_0x(xgetbv, kX86InstIdXgetbv) + //! Set XCR - `XCR[ECX] <- EDX:EAX` (XSAVE). + INST_0x(xsetbv, kX86InstIdXsetbv) + + // -------------------------------------------------------------------------- + // [POPCNT] + // -------------------------------------------------------------------------- + + //! Return the count of number of bits set to 1 (POPCNT). + INST_2x(popcnt, kX86InstIdPopcnt, X86GpReg, X86GpReg) + //! \overload + INST_2x(popcnt, kX86InstIdPopcnt, X86GpReg, X86Mem) + + // -------------------------------------------------------------------------- + // [LZCNT] + // -------------------------------------------------------------------------- + + //! Count the number of leading zero bits (LZCNT). + INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86GpReg) + //! \overload + INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86Mem) + + // -------------------------------------------------------------------------- + // [BMI] + // -------------------------------------------------------------------------- + + //! Bitwise and-not (BMI). + INST_3x(andn, kX86InstIdAndn, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(andn, kX86InstIdAndn, X86GpReg, X86GpReg, X86Mem) + + //! Bit field extract (BMI). + INST_3x(bextr, kX86InstIdBextr, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(bextr, kX86InstIdBextr, X86GpReg, X86Mem, X86GpReg) + + //! Extract lower set isolated bit (BMI). + INST_2x(blsi, kX86InstIdBlsi, X86GpReg, X86GpReg) + //! \overload + INST_2x(blsi, kX86InstIdBlsi, X86GpReg, X86Mem) + + //! Get mask up to lowest set bit (BMI). + INST_2x(blsmsk, kX86InstIdBlsmsk, X86GpReg, X86GpReg) + //! \overload + INST_2x(blsmsk, kX86InstIdBlsmsk, X86GpReg, X86Mem) + + //! Reset lowest set bit (BMI). + INST_2x(blsr, kX86InstIdBlsr, X86GpReg, X86GpReg) + //! \overload + INST_2x(blsr, kX86InstIdBlsr, X86GpReg, X86Mem) + + //! Count the number of trailing zero bits (BMI). + INST_2x(tzcnt, kX86InstIdTzcnt, X86GpReg, X86GpReg) + //! \overload + INST_2x(tzcnt, kX86InstIdTzcnt, X86GpReg, X86Mem) + + // -------------------------------------------------------------------------- + // [BMI2] + // -------------------------------------------------------------------------- + + //! Zero high bits starting with specified bit position (BMI2). + INST_3x(bzhi, kX86InstIdBzhi, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(bzhi, kX86InstIdBzhi, X86GpReg, X86Mem, X86GpReg) + + //! Unsigned multiply without affecting flags (BMI2). + INST_3x(mulx, kX86InstIdMulx, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(mulx, kX86InstIdMulx, X86GpReg, X86GpReg, X86Mem) + + //! Parallel bits deposit (BMI2). + INST_3x(pdep, kX86InstIdPdep, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(pdep, kX86InstIdPdep, X86GpReg, X86GpReg, X86Mem) + + //! Parallel bits extract (BMI2). + INST_3x(pext, kX86InstIdPext, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(pext, kX86InstIdPext, X86GpReg, X86GpReg, X86Mem) + + //! Rotate right without affecting flags (BMI2). + INST_3i(rorx, kX86InstIdRorx, X86GpReg, X86GpReg, Imm) + //! \overload + INST_3i(rorx, kX86InstIdRorx, X86GpReg, X86Mem, Imm) + + //! Shift arithmetic right without affecting flags (BMI2). + INST_3x(sarx, kX86InstIdSarx, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(sarx, kX86InstIdSarx, X86GpReg, X86Mem, X86GpReg) + + //! Shift logical left without affecting flags (BMI2). + INST_3x(shlx, kX86InstIdShlx, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(shlx, kX86InstIdShlx, X86GpReg, X86Mem, X86GpReg) + + //! Shift logical right without affecting flags (BMI2). + INST_3x(shrx, kX86InstIdShrx, X86GpReg, X86GpReg, X86GpReg) + //! \overload + INST_3x(shrx, kX86InstIdShrx, X86GpReg, X86Mem, X86GpReg) + + // -------------------------------------------------------------------------- + // [ADX] + // -------------------------------------------------------------------------- + + //! Unsigned integer addition of two operands with carry flag (ADX). + INST_2x(adcx, kX86InstIdAdcx, X86GpReg, X86GpReg) + //! \overload + INST_2x(adcx, kX86InstIdAdcx, X86GpReg, X86Mem) + + //! Unsigned integer addition of two operands with overflow flag (ADX). + INST_2x(adox, kX86InstIdAdox, X86GpReg, X86GpReg) + //! \overload + INST_2x(adox, kX86InstIdAdox, X86GpReg, X86Mem) + + // -------------------------------------------------------------------------- + // [TBM] + // -------------------------------------------------------------------------- + + //! Fill from lowest clear bit (TBM). + INST_2x(blcfill, kX86InstIdBlcfill, X86GpReg, X86GpReg) + //! \overload + INST_2x(blcfill, kX86InstIdBlcfill, X86GpReg, X86Mem) + + //! Isolate lowest clear bit (TBM). + INST_2x(blci, kX86InstIdBlci, X86GpReg, X86GpReg) + //! \overload + INST_2x(blci, kX86InstIdBlci, X86GpReg, X86Mem) + + //! Isolate lowest clear bit and complement (TBM). + INST_2x(blcic, kX86InstIdBlcic, X86GpReg, X86GpReg) + //! \overload + INST_2x(blcic, kX86InstIdBlcic, X86GpReg, X86Mem) + + //! Mask from lowest clear bit (TBM). + INST_2x(blcmsk, kX86InstIdBlcmsk, X86GpReg, X86GpReg) + //! \overload + INST_2x(blcmsk, kX86InstIdBlcmsk, X86GpReg, X86Mem) + + //! Set lowest clear bit (TBM). + INST_2x(blcs, kX86InstIdBlcs, X86GpReg, X86GpReg) + //! \overload + INST_2x(blcs, kX86InstIdBlcs, X86GpReg, X86Mem) + + //! Fill from lowest set bit (TBM). + INST_2x(blsfill, kX86InstIdBlsfill, X86GpReg, X86GpReg) + //! \overload + INST_2x(blsfill, kX86InstIdBlsfill, X86GpReg, X86Mem) + + //! Isolate lowest set bit and complement (TBM). + INST_2x(blsic, kX86InstIdBlsic, X86GpReg, X86GpReg) + //! \overload + INST_2x(blsic, kX86InstIdBlsic, X86GpReg, X86Mem) + + //! Inverse mask from trailing ones (TBM) + INST_2x(t1mskc, kX86InstIdT1mskc, X86GpReg, X86GpReg) + //! \overload + INST_2x(t1mskc, kX86InstIdT1mskc, X86GpReg, X86Mem) + + //! Mask from trailing zeros (TBM) + INST_2x(tzmsk, kX86InstIdTzmsk, X86GpReg, X86GpReg) + //! \overload + INST_2x(tzmsk, kX86InstIdTzmsk, X86GpReg, X86Mem) + + // -------------------------------------------------------------------------- + // [CLFLUSH / CLFLUSH_OPT] + // -------------------------------------------------------------------------- + + //! Flush cache line (CLFLUSH). + INST_1x(clflush, kX86InstIdClflush, X86Mem) + + //! Flush cache line (CLFLUSH_OPT). + INST_1x(clflushopt, kX86InstIdClflushopt, X86Mem) + + // -------------------------------------------------------------------------- + // [PREFETCHW / PREFETCHW1] + // -------------------------------------------------------------------------- + + //! Prefetch data into caches in anticipation of a write (3DNOW / PREFETCHW). + INST_1x(prefetchw, kX86InstIdPrefetchw, X86Mem) + + //! Prefetch vector data into caches with intent to write and T1 hint (PREFETCHWT1). + INST_1x(prefetchwt1, kX86InstIdPrefetchwt1, X86Mem) + + // -------------------------------------------------------------------------- + // [RDRAND / RDSEED] + // -------------------------------------------------------------------------- + + //! Store a pseudo-random number in destination register (crypto-unsafe) (RDRAND). + INST_1x(rdrand, kX86InstIdRdrand, X86GpReg) + + //! Store a random seed in destination register (crypto-unsafe) (RDSEED). + INST_1x(rdseed, kX86InstIdRdseed, X86GpReg) + + // -------------------------------------------------------------------------- + // [FSGSBASE] + // -------------------------------------------------------------------------- + + INST_1x(rdfsbase, kX86InstIdRdfsbase, X86GpReg) + INST_1x(rdgsbase, kX86InstIdRdgsbase, X86GpReg) + INST_1x(wrfsbase, kX86InstIdWrfsbase, X86GpReg) + INST_1x(wrgsbase, kX86InstIdWrgsbase, X86GpReg) + + // -------------------------------------------------------------------------- + // [MMX] + // -------------------------------------------------------------------------- + + //! Move DWORD (MMX). + INST_2x(movd, kX86InstIdMovd, X86Mem, X86MmReg) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86GpReg, X86MmReg) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86MmReg, X86Mem) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86MmReg, X86GpReg) + + //! Move QWORD (MMX). + INST_2x(movq, kX86InstIdMovq, X86MmReg, X86MmReg) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86Mem, X86MmReg) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86MmReg, X86Mem) + + //! Move QWORD (X64 Only). + INST_2x(movq, kX86InstIdMovq, X86GpReg, X86MmReg) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86MmReg, X86GpReg) + + //! Pack DWORDs to WORDs with signed saturation (MMX). + INST_2x(packssdw, kX86InstIdPackssdw, X86MmReg, X86MmReg) + //! \overload + INST_2x(packssdw, kX86InstIdPackssdw, X86MmReg, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (MMX). + INST_2x(packsswb, kX86InstIdPacksswb, X86MmReg, X86MmReg) + //! \overload + INST_2x(packsswb, kX86InstIdPacksswb, X86MmReg, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (MMX). + INST_2x(packuswb, kX86InstIdPackuswb, X86MmReg, X86MmReg) + //! \overload + INST_2x(packuswb, kX86InstIdPackuswb, X86MmReg, X86Mem) + + //! Packed BYTE add (MMX). + INST_2x(paddb, kX86InstIdPaddb, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddb, kX86InstIdPaddb, X86MmReg, X86Mem) + + //! Packed DWORD add (MMX). + INST_2x(paddd, kX86InstIdPaddd, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddd, kX86InstIdPaddd, X86MmReg, X86Mem) + + //! Packed BYTE add with saturation (MMX). + INST_2x(paddsb, kX86InstIdPaddsb, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddsb, kX86InstIdPaddsb, X86MmReg, X86Mem) + + //! Packed WORD add with saturation (MMX). + INST_2x(paddsw, kX86InstIdPaddsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddsw, kX86InstIdPaddsw, X86MmReg, X86Mem) + + //! Packed BYTE add with unsigned saturation (MMX). + INST_2x(paddusb, kX86InstIdPaddusb, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddusb, kX86InstIdPaddusb, X86MmReg, X86Mem) + + //! Packed WORD add with unsigned saturation (MMX). + INST_2x(paddusw, kX86InstIdPaddusw, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddusw, kX86InstIdPaddusw, X86MmReg, X86Mem) + + //! Packed WORD add (MMX). + INST_2x(paddw, kX86InstIdPaddw, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddw, kX86InstIdPaddw, X86MmReg, X86Mem) + + //! Packed bitwise and (MMX). + INST_2x(pand, kX86InstIdPand, X86MmReg, X86MmReg) + //! \overload + INST_2x(pand, kX86InstIdPand, X86MmReg, X86Mem) + + //! Packed bitwise and-not (MMX). + INST_2x(pandn, kX86InstIdPandn, X86MmReg, X86MmReg) + //! \overload + INST_2x(pandn, kX86InstIdPandn, X86MmReg, X86Mem) + + //! Packed BYTEs compare for equality (MMX). + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86MmReg, X86MmReg) + //! \overload + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86MmReg, X86Mem) + + //! Packed DWORDs compare for equality (MMX). + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86MmReg, X86MmReg) + //! \overload + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86MmReg, X86Mem) + + //! Packed WORDs compare for equality (MMX). + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86MmReg, X86Mem) + + //! Packed BYTEs compare if greater than (MMX). + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86MmReg, X86MmReg) + //! \overload + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86MmReg, X86Mem) + + //! Packed DWORDs compare if greater than (MMX). + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86MmReg, X86MmReg) + //! \overload + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86MmReg, X86Mem) + + //! Packed WORDs compare if greater than (MMX). + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86MmReg, X86Mem) + + //! Packed WORDs multiply high (MMX). + INST_2x(pmulhw, kX86InstIdPmulhw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmulhw, kX86InstIdPmulhw, X86MmReg, X86Mem) + + //! Packed WORDs multiply low (MMX). + INST_2x(pmullw, kX86InstIdPmullw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmullw, kX86InstIdPmullw, X86MmReg, X86Mem) + + //! Pakced bitwise or (MMX). + INST_2x(por, kX86InstIdPor, X86MmReg, X86MmReg) + //! \overload + INST_2x(por, kX86InstIdPor, X86MmReg, X86Mem) + + //! Packed WORD multiply and add to packed DWORD (MMX). + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86MmReg, X86Mem) + + //! Packed DWORD shift left logical (MMX). + INST_2x(pslld, kX86InstIdPslld, X86MmReg, X86MmReg) + //! \overload + INST_2x(pslld, kX86InstIdPslld, X86MmReg, X86Mem) + //! \overload + INST_2i(pslld, kX86InstIdPslld, X86MmReg, Imm) + + //! Packed QWORD shift left logical (MMX). + INST_2x(psllq, kX86InstIdPsllq, X86MmReg, X86MmReg) + //! \overload + INST_2x(psllq, kX86InstIdPsllq, X86MmReg, X86Mem) + //! \overload + INST_2i(psllq, kX86InstIdPsllq, X86MmReg, Imm) + + //! Packed WORD shift left logical (MMX). + INST_2x(psllw, kX86InstIdPsllw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psllw, kX86InstIdPsllw, X86MmReg, X86Mem) + //! \overload + INST_2i(psllw, kX86InstIdPsllw, X86MmReg, Imm) + + //! Packed DWORD shift right arithmetic (MMX). + INST_2x(psrad, kX86InstIdPsrad, X86MmReg, X86MmReg) + //! \overload + INST_2x(psrad, kX86InstIdPsrad, X86MmReg, X86Mem) + //! \overload + INST_2i(psrad, kX86InstIdPsrad, X86MmReg, Imm) + + //! Packed WORD shift right arithmetic (MMX). + INST_2x(psraw, kX86InstIdPsraw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psraw, kX86InstIdPsraw, X86MmReg, X86Mem) + //! \overload + INST_2i(psraw, kX86InstIdPsraw, X86MmReg, Imm) + + //! Packed DWORD shift right logical (MMX). + INST_2x(psrld, kX86InstIdPsrld, X86MmReg, X86MmReg) + //! \overload + INST_2x(psrld, kX86InstIdPsrld, X86MmReg, X86Mem) + //! \overload + INST_2i(psrld, kX86InstIdPsrld, X86MmReg, Imm) + + //! Packed QWORD shift right logical (MMX). + INST_2x(psrlq, kX86InstIdPsrlq, X86MmReg, X86MmReg) + //! \overload + INST_2x(psrlq, kX86InstIdPsrlq, X86MmReg, X86Mem) + //! \overload + INST_2i(psrlq, kX86InstIdPsrlq, X86MmReg, Imm) + + //! Packed WORD shift right logical (MMX). + INST_2x(psrlw, kX86InstIdPsrlw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psrlw, kX86InstIdPsrlw, X86MmReg, X86Mem) + //! \overload + INST_2i(psrlw, kX86InstIdPsrlw, X86MmReg, Imm) + + //! Packed BYTE subtract (MMX). + INST_2x(psubb, kX86InstIdPsubb, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubb, kX86InstIdPsubb, X86MmReg, X86Mem) + + //! Packed DWORD subtract (MMX). + INST_2x(psubd, kX86InstIdPsubd, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubd, kX86InstIdPsubd, X86MmReg, X86Mem) + + //! Packed BYTE subtract with saturation (MMX). + INST_2x(psubsb, kX86InstIdPsubsb, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubsb, kX86InstIdPsubsb, X86MmReg, X86Mem) + + //! Packed WORD subtract with saturation (MMX). + INST_2x(psubsw, kX86InstIdPsubsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubsw, kX86InstIdPsubsw, X86MmReg, X86Mem) + + //! Packed BYTE subtract with unsigned saturation (MMX). + INST_2x(psubusb, kX86InstIdPsubusb, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubusb, kX86InstIdPsubusb, X86MmReg, X86Mem) + + //! Packed WORD subtract with unsigned saturation (MMX). + INST_2x(psubusw, kX86InstIdPsubusw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubusw, kX86InstIdPsubusw, X86MmReg, X86Mem) + + //! Packed WORD subtract (MMX). + INST_2x(psubw, kX86InstIdPsubw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubw, kX86InstIdPsubw, X86MmReg, X86Mem) + + //! Unpack high packed BYTEs to WORDs (MMX). + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86MmReg, X86MmReg) + //! \overload + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86MmReg, X86Mem) + + //! Unpack high packed DWORDs to QWORDs (MMX). + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86MmReg, X86MmReg) + //! \overload + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86MmReg, X86Mem) + + //! Unpack high packed WORDs to DWORDs (MMX). + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86MmReg, X86MmReg) + //! \overload + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86MmReg, X86Mem) + + //! Unpack low packed BYTEs to WORDs (MMX). + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86MmReg, X86MmReg) + //! \overload + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86MmReg, X86Mem) + + //! Unpack low packed DWORDs to QWORDs (MMX). + INST_2x(punpckldq, kX86InstIdPunpckldq, X86MmReg, X86MmReg) + //! \overload + INST_2x(punpckldq, kX86InstIdPunpckldq, X86MmReg, X86Mem) + + //! Unpack low packed WORDs to DWORDs (MMX). + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86MmReg, X86MmReg) + //! \overload + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86MmReg, X86Mem) + + //! Packed bitwise xor (MMX). + INST_2x(pxor, kX86InstIdPxor, X86MmReg, X86MmReg) + //! \overload + INST_2x(pxor, kX86InstIdPxor, X86MmReg, X86Mem) + + //! Empty MMX state. + INST_0x(emms, kX86InstIdEmms) + + // ------------------------------------------------------------------------- + // [3dNow] + // ------------------------------------------------------------------------- + + //! Packed unsigned BYTE average (3DNOW). + INST_2x(pavgusb, kX86InstIdPavgusb, X86MmReg, X86MmReg) + //! \overload + INST_2x(pavgusb, kX86InstIdPavgusb, X86MmReg, X86Mem) + + //! Packed SP-FP to DWORD convert (3DNOW). + INST_2x(pf2id, kX86InstIdPf2id, X86MmReg, X86MmReg) + //! \overload + INST_2x(pf2id, kX86InstIdPf2id, X86MmReg, X86Mem) + + //! Packed SP-FP to WORD convert (3DNOW). + INST_2x(pf2iw, kX86InstIdPf2iw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pf2iw, kX86InstIdPf2iw, X86MmReg, X86Mem) + + //! Packed SP-FP accumulate (3DNOW). + INST_2x(pfacc, kX86InstIdPfacc, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfacc, kX86InstIdPfacc, X86MmReg, X86Mem) + + //! Packed SP-FP addition (3DNOW). + INST_2x(pfadd, kX86InstIdPfadd, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfadd, kX86InstIdPfadd, X86MmReg, X86Mem) + + //! Packed SP-FP compare - dst == src (3DNOW). + INST_2x(pfcmpeq, kX86InstIdPfcmpeq, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfcmpeq, kX86InstIdPfcmpeq, X86MmReg, X86Mem) + + //! Packed SP-FP compare - dst >= src (3DNOW). + INST_2x(pfcmpge, kX86InstIdPfcmpge, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfcmpge, kX86InstIdPfcmpge, X86MmReg, X86Mem) + + //! Packed SP-FP compare - dst > src (3DNOW). + INST_2x(pfcmpgt, kX86InstIdPfcmpgt, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfcmpgt, kX86InstIdPfcmpgt, X86MmReg, X86Mem) + + //! Packed SP-FP maximum (3DNOW). + INST_2x(pfmax, kX86InstIdPfmax, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfmax, kX86InstIdPfmax, X86MmReg, X86Mem) + + //! Packed SP-FP minimum (3DNOW). + INST_2x(pfmin, kX86InstIdPfmin, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfmin, kX86InstIdPfmin, X86MmReg, X86Mem) + + //! Packed SP-FP multiply (3DNOW). + INST_2x(pfmul, kX86InstIdPfmul, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfmul, kX86InstIdPfmul, X86MmReg, X86Mem) + + //! Packed SP-FP negative accumulate (3DNOW). + INST_2x(pfnacc, kX86InstIdPfnacc, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfnacc, kX86InstIdPfnacc, X86MmReg, X86Mem) + + //! Packed SP-FP mixed accumulate (3DNOW). + INST_2x(pfpnacc, kX86InstIdPfpnacc, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfpnacc, kX86InstIdPfpnacc, X86MmReg, X86Mem) + + //! Packed SP-FP reciprocal Approximation (3DNOW). + INST_2x(pfrcp, kX86InstIdPfrcp, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfrcp, kX86InstIdPfrcp, X86MmReg, X86Mem) + + //! Packed SP-FP reciprocal, first iteration step (3DNOW). + INST_2x(pfrcpit1, kX86InstIdPfrcpit1, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfrcpit1, kX86InstIdPfrcpit1, X86MmReg, X86Mem) + + //! Packed SP-FP reciprocal, second iteration step (3DNOW). + INST_2x(pfrcpit2, kX86InstIdPfrcpit2, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfrcpit2, kX86InstIdPfrcpit2, X86MmReg, X86Mem) + + //! Packed SP-FP reciprocal square root, first iteration step (3DNOW). + INST_2x(pfrsqit1, kX86InstIdPfrsqit1, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfrsqit1, kX86InstIdPfrsqit1, X86MmReg, X86Mem) + + //! Packed SP-FP reciprocal square root approximation (3DNOW). + INST_2x(pfrsqrt, kX86InstIdPfrsqrt, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfrsqrt, kX86InstIdPfrsqrt, X86MmReg, X86Mem) + + //! Packed SP-FP subtract (3DNOW). + INST_2x(pfsub, kX86InstIdPfsub, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfsub, kX86InstIdPfsub, X86MmReg, X86Mem) + + //! Packed SP-FP reverse subtract (3DNOW). + INST_2x(pfsubr, kX86InstIdPfsubr, X86MmReg, X86MmReg) + //! \overload + INST_2x(pfsubr, kX86InstIdPfsubr, X86MmReg, X86Mem) + + //! Packed DWORDs to SP-FP (3DNOW). + INST_2x(pi2fd, kX86InstIdPi2fd, X86MmReg, X86MmReg) + //! \overload + INST_2x(pi2fd, kX86InstIdPi2fd, X86MmReg, X86Mem) + + //! Packed WORDs to SP-FP (3DNOW). + INST_2x(pi2fw, kX86InstIdPi2fw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pi2fw, kX86InstIdPi2fw, X86MmReg, X86Mem) + + //! Packed multiply WORD with rounding (3DNOW). + INST_2x(pmulhrw, kX86InstIdPmulhrw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmulhrw, kX86InstIdPmulhrw, X86MmReg, X86Mem) + + //! Packed swap DWORDs (3DNOW). + INST_2x(pswapd, kX86InstIdPswapd, X86MmReg, X86MmReg) + //! \overload + INST_2x(pswapd, kX86InstIdPswapd, X86MmReg, X86Mem) + + //! Prefetch (3DNOW). + INST_1x(prefetch3dnow, kX86InstIdPrefetch3dNow, X86Mem) + + //! Faster EMMS (3DNOW). + INST_0x(femms, kX86InstIdFemms) + + // -------------------------------------------------------------------------- + // [SSE] + // -------------------------------------------------------------------------- + + //! Packed SP-FP add (SSE). + INST_2x(addps, kX86InstIdAddps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(addps, kX86InstIdAddps, X86XmmReg, X86Mem) + + //! Scalar SP-FP add (SSE). + INST_2x(addss, kX86InstIdAddss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(addss, kX86InstIdAddss, X86XmmReg, X86Mem) + + //! Packed SP-FP bitwise and-not (SSE). + INST_2x(andnps, kX86InstIdAndnps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(andnps, kX86InstIdAndnps, X86XmmReg, X86Mem) + + //! Packed SP-FP bitwise and (SSE). + INST_2x(andps, kX86InstIdAndps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(andps, kX86InstIdAndps, X86XmmReg, X86Mem) + + //! Packed SP-FP compare (SSE). + INST_3i(cmpps, kX86InstIdCmpps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(cmpps, kX86InstIdCmpps, X86XmmReg, X86Mem, Imm) + + //! Compare scalar SP-FP (SSE). + INST_3i(cmpss, kX86InstIdCmpss, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(cmpss, kX86InstIdCmpss, X86XmmReg, X86Mem, Imm) + + //! Scalar ordered SP-FP compare and set EFLAGS (SSE). + INST_2x(comiss, kX86InstIdComiss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(comiss, kX86InstIdComiss, X86XmmReg, X86Mem) + + //! Packed signed INT32 to packed SP-FP conversion (SSE). + INST_2x(cvtpi2ps, kX86InstIdCvtpi2ps, X86XmmReg, X86MmReg) + //! \overload + INST_2x(cvtpi2ps, kX86InstIdCvtpi2ps, X86XmmReg, X86Mem) + + //! Packed SP-FP to packed INT32 conversion (SSE). + INST_2x(cvtps2pi, kX86InstIdCvtps2pi, X86MmReg, X86XmmReg) + //! \overload + INST_2x(cvtps2pi, kX86InstIdCvtps2pi, X86MmReg, X86Mem) + + //! Convert scalar INT32 to SP-FP (SSE). + INST_2x(cvtsi2ss, kX86InstIdCvtsi2ss, X86XmmReg, X86GpReg) + //! \overload + INST_2x(cvtsi2ss, kX86InstIdCvtsi2ss, X86XmmReg, X86Mem) + + //! Convert scalar SP-FP to INT32 (SSE). + INST_2x(cvtss2si, kX86InstIdCvtss2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(cvtss2si, kX86InstIdCvtss2si, X86GpReg, X86Mem) + + //! Convert with truncation packed SP-FP to packed INT32 (SSE). + INST_2x(cvttps2pi, kX86InstIdCvttps2pi, X86MmReg, X86XmmReg) + //! \overload + INST_2x(cvttps2pi, kX86InstIdCvttps2pi, X86MmReg, X86Mem) + + //! Convert with truncation scalar SP-FP to INT32 (SSE). + INST_2x(cvttss2si, kX86InstIdCvttss2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(cvttss2si, kX86InstIdCvttss2si, X86GpReg, X86Mem) + + //! Packed SP-FP divide (SSE). + INST_2x(divps, kX86InstIdDivps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(divps, kX86InstIdDivps, X86XmmReg, X86Mem) + + //! Scalar SP-FP divide (SSE). + INST_2x(divss, kX86InstIdDivss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(divss, kX86InstIdDivss, X86XmmReg, X86Mem) + + //! Load streaming SIMD extension control/status (SSE). + INST_1x(ldmxcsr, kX86InstIdLdmxcsr, X86Mem) + + //! Byte mask write to DS:EDI/RDI (SSE). + INST_2x(maskmovq, kX86InstIdMaskmovq, X86MmReg, X86MmReg) + + //! Packed SP-FP maximum (SSE). + INST_2x(maxps, kX86InstIdMaxps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(maxps, kX86InstIdMaxps, X86XmmReg, X86Mem) + + //! Scalar SP-FP maximum (SSE). + INST_2x(maxss, kX86InstIdMaxss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(maxss, kX86InstIdMaxss, X86XmmReg, X86Mem) + + //! Packed SP-FP minimum (SSE). + INST_2x(minps, kX86InstIdMinps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(minps, kX86InstIdMinps, X86XmmReg, X86Mem) + + //! Scalar SP-FP minimum (SSE). + INST_2x(minss, kX86InstIdMinss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(minss, kX86InstIdMinss, X86XmmReg, X86Mem) + + //! Move aligned packed SP-FP (SSE). + INST_2x(movaps, kX86InstIdMovaps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movaps, kX86InstIdMovaps, X86XmmReg, X86Mem) + //! Move aligned packed SP-FP (SSE). + INST_2x(movaps, kX86InstIdMovaps, X86Mem, X86XmmReg) + + //! Move DWORD. + INST_2x(movd, kX86InstIdMovd, X86Mem, X86XmmReg) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86GpReg, X86XmmReg) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86XmmReg, X86Mem) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86XmmReg, X86GpReg) + + //! Move QWORD (SSE). + INST_2x(movq, kX86InstIdMovq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86Mem, X86XmmReg) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86XmmReg, X86Mem) + + //! Move QWORD (X64 Only). + INST_2x(movq, kX86InstIdMovq, X86GpReg, X86XmmReg) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86XmmReg, X86GpReg) + + //! Move QWORD using NT hint (SSE). + INST_2x(movntq, kX86InstIdMovntq, X86Mem, X86MmReg) + + //! Move high to low packed SP-FP (SSE). + INST_2x(movhlps, kX86InstIdMovhlps, X86XmmReg, X86XmmReg) + + //! Move high packed SP-FP (SSE). + INST_2x(movhps, kX86InstIdMovhps, X86XmmReg, X86Mem) + //! Move high packed SP-FP (SSE). + INST_2x(movhps, kX86InstIdMovhps, X86Mem, X86XmmReg) + + //! Move low to high packed SP-FP (SSE). + INST_2x(movlhps, kX86InstIdMovlhps, X86XmmReg, X86XmmReg) + + //! Move low packed SP-FP (SSE). + INST_2x(movlps, kX86InstIdMovlps, X86XmmReg, X86Mem) + //! Move low packed SP-FP (SSE). + INST_2x(movlps, kX86InstIdMovlps, X86Mem, X86XmmReg) + + //! Move aligned packed SP-FP using NT hint (SSE). + INST_2x(movntps, kX86InstIdMovntps, X86Mem, X86XmmReg) + + //! Move scalar SP-FP (SSE). + INST_2x(movss, kX86InstIdMovss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movss, kX86InstIdMovss, X86XmmReg, X86Mem) + //! \overload + INST_2x(movss, kX86InstIdMovss, X86Mem, X86XmmReg) + + //! Move unaligned packed SP-FP (SSE). + INST_2x(movups, kX86InstIdMovups, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movups, kX86InstIdMovups, X86XmmReg, X86Mem) + //! \overload + INST_2x(movups, kX86InstIdMovups, X86Mem, X86XmmReg) + + //! Packed SP-FP multiply (SSE). + INST_2x(mulps, kX86InstIdMulps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(mulps, kX86InstIdMulps, X86XmmReg, X86Mem) + + //! Scalar SP-FP multiply (SSE). + INST_2x(mulss, kX86InstIdMulss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(mulss, kX86InstIdMulss, X86XmmReg, X86Mem) + + //! Packed SP-FP bitwise or (SSE). + INST_2x(orps, kX86InstIdOrps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(orps, kX86InstIdOrps, X86XmmReg, X86Mem) + + //! Packed BYTE average (SSE). + INST_2x(pavgb, kX86InstIdPavgb, X86MmReg, X86MmReg) + //! \overload + INST_2x(pavgb, kX86InstIdPavgb, X86MmReg, X86Mem) + + //! Packed WORD average (SSE). + INST_2x(pavgw, kX86InstIdPavgw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pavgw, kX86InstIdPavgw, X86MmReg, X86Mem) + + //! Extract WORD based on selector (SSE). + INST_3i(pextrw, kX86InstIdPextrw, X86GpReg, X86MmReg, Imm) + + //! Insert WORD based on selector (SSE). + INST_3i(pinsrw, kX86InstIdPinsrw, X86MmReg, X86GpReg, Imm) + //! \overload + INST_3i(pinsrw, kX86InstIdPinsrw, X86MmReg, X86Mem, Imm) + + //! Packed WORD maximum (SSE). + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86MmReg, X86Mem) + + //! Packed BYTE unsigned maximum (SSE). + INST_2x(pmaxub, kX86InstIdPmaxub, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmaxub, kX86InstIdPmaxub, X86MmReg, X86Mem) + + //! Packed WORD minimum (SSE). + INST_2x(pminsw, kX86InstIdPminsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pminsw, kX86InstIdPminsw, X86MmReg, X86Mem) + + //! Packed BYTE unsigned minimum (SSE). + INST_2x(pminub, kX86InstIdPminub, X86MmReg, X86MmReg) + //! \overload + INST_2x(pminub, kX86InstIdPminub, X86MmReg, X86Mem) + + //! Move Byte mask to integer (SSE). + INST_2x(pmovmskb, kX86InstIdPmovmskb, X86GpReg, X86MmReg) + + //! Packed WORD unsigned multiply high (SSE). + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86MmReg, X86Mem) + + //! Packed WORD sum of absolute differences (SSE). + INST_2x(psadbw, kX86InstIdPsadbw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psadbw, kX86InstIdPsadbw, X86MmReg, X86Mem) + + //! Packed WORD shuffle (SSE). + INST_3i(pshufw, kX86InstIdPshufw, X86MmReg, X86MmReg, Imm) + //! \overload + INST_3i(pshufw, kX86InstIdPshufw, X86MmReg, X86Mem, Imm) + + //! Packed SP-FP reciprocal (SSE). + INST_2x(rcpps, kX86InstIdRcpps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(rcpps, kX86InstIdRcpps, X86XmmReg, X86Mem) + + //! Scalar SP-FP reciprocal (SSE). + INST_2x(rcpss, kX86InstIdRcpss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(rcpss, kX86InstIdRcpss, X86XmmReg, X86Mem) + + //! Prefetch (SSE). + INST_2i(prefetch, kX86InstIdPrefetch, X86Mem, Imm) + + //! Packed WORD sum of absolute differences (SSE). + INST_2x(psadbw, kX86InstIdPsadbw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psadbw, kX86InstIdPsadbw, X86XmmReg, X86Mem) + + //! Packed SP-FP square root reciprocal (SSE). + INST_2x(rsqrtps, kX86InstIdRsqrtps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(rsqrtps, kX86InstIdRsqrtps, X86XmmReg, X86Mem) + + //! Scalar SP-FP square root reciprocal (SSE). + INST_2x(rsqrtss, kX86InstIdRsqrtss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(rsqrtss, kX86InstIdRsqrtss, X86XmmReg, X86Mem) + + //! Store fence (SSE). + INST_0x(sfence, kX86InstIdSfence) + + //! Shuffle SP-FP (SSE). + INST_3i(shufps, kX86InstIdShufps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(shufps, kX86InstIdShufps, X86XmmReg, X86Mem, Imm) + + //! Packed SP-FP square root (SSE). + INST_2x(sqrtps, kX86InstIdSqrtps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sqrtps, kX86InstIdSqrtps, X86XmmReg, X86Mem) + + //! Scalar SP-FP square root (SSE). + INST_2x(sqrtss, kX86InstIdSqrtss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sqrtss, kX86InstIdSqrtss, X86XmmReg, X86Mem) + + //! Store streaming SIMD extension control/status (SSE). + INST_1x(stmxcsr, kX86InstIdStmxcsr, X86Mem) + + //! Packed SP-FP subtract (SSE). + INST_2x(subps, kX86InstIdSubps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(subps, kX86InstIdSubps, X86XmmReg, X86Mem) + + //! Scalar SP-FP subtract (SSE). + INST_2x(subss, kX86InstIdSubss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(subss, kX86InstIdSubss, X86XmmReg, X86Mem) + + //! Unordered scalar SP-FP compare and set EFLAGS (SSE). + INST_2x(ucomiss, kX86InstIdUcomiss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(ucomiss, kX86InstIdUcomiss, X86XmmReg, X86Mem) + + //! Unpack high packed SP-FP data (SSE). + INST_2x(unpckhps, kX86InstIdUnpckhps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(unpckhps, kX86InstIdUnpckhps, X86XmmReg, X86Mem) + + //! Unpack low packed SP-FP data (SSE). + INST_2x(unpcklps, kX86InstIdUnpcklps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(unpcklps, kX86InstIdUnpcklps, X86XmmReg, X86Mem) + + //! Packed SP-FP bitwise xor (SSE). + INST_2x(xorps, kX86InstIdXorps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(xorps, kX86InstIdXorps, X86XmmReg, X86Mem) + + // -------------------------------------------------------------------------- + // [SSE2] + // -------------------------------------------------------------------------- + + //! Packed DP-FP add (SSE2). + INST_2x(addpd, kX86InstIdAddpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(addpd, kX86InstIdAddpd, X86XmmReg, X86Mem) + + //! Scalar DP-FP add (SSE2). + INST_2x(addsd, kX86InstIdAddsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(addsd, kX86InstIdAddsd, X86XmmReg, X86Mem) + + //! Packed DP-FP bitwise and-not (SSE2). + INST_2x(andnpd, kX86InstIdAndnpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(andnpd, kX86InstIdAndnpd, X86XmmReg, X86Mem) + + //! Packed DP-FP bitwise and (SSE2). + INST_2x(andpd, kX86InstIdAndpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(andpd, kX86InstIdAndpd, X86XmmReg, X86Mem) + + //! Packed DP-FP compare (SSE2). + INST_3i(cmppd, kX86InstIdCmppd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(cmppd, kX86InstIdCmppd, X86XmmReg, X86Mem, Imm) + + //! Scalar SP-FP compare (SSE2). + INST_3i(cmpsd, kX86InstIdCmpsd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(cmpsd, kX86InstIdCmpsd, X86XmmReg, X86Mem, Imm) + + //! Scalar ordered DP-FP compare and set EFLAGS (SSE2). + INST_2x(comisd, kX86InstIdComisd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(comisd, kX86InstIdComisd, X86XmmReg, X86Mem) + + //! Convert packed DWORDs to packed DP-FP (SSE2). + INST_2x(cvtdq2pd, kX86InstIdCvtdq2pd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtdq2pd, kX86InstIdCvtdq2pd, X86XmmReg, X86Mem) + + //! Convert packed DWORDs to packed SP-FP (SSE2). + INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmReg, X86Mem) + + //! Convert packed DP-FP to packed DWORDs (SSE2). + INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86Mem) + + //! Convert packed DP-FP to packed DWORDs (SSE2). + INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86XmmReg) + //! \overload + INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86Mem) + + //! Convert packed DP-FP to packed SP-FP (SSE2). + INST_2x(cvtpd2ps, kX86InstIdCvtpd2ps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtpd2ps, kX86InstIdCvtpd2ps, X86XmmReg, X86Mem) + + //! Convert packed DWORDs integers to packed DP-FP (SSE2). + INST_2x(cvtpi2pd, kX86InstIdCvtpi2pd, X86XmmReg, X86MmReg) + //! \overload + INST_2x(cvtpi2pd, kX86InstIdCvtpi2pd, X86XmmReg, X86Mem) + + //! Convert packed SP-FP to packed DWORDs (SSE2). + INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86Mem) + + //! Convert packed SP-FP to packed DP-FP (SSE2). + INST_2x(cvtps2pd, kX86InstIdCvtps2pd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtps2pd, kX86InstIdCvtps2pd, X86XmmReg, X86Mem) + + //! Convert scalar DP-FP to DWORD integer (SSE2). + INST_2x(cvtsd2si, kX86InstIdCvtsd2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(cvtsd2si, kX86InstIdCvtsd2si, X86GpReg, X86Mem) + + //! Convert scalar DP-FP to scalar SP-FP (SSE2). + INST_2x(cvtsd2ss, kX86InstIdCvtsd2ss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtsd2ss, kX86InstIdCvtsd2ss, X86XmmReg, X86Mem) + + //! Convert DWORD integer to scalar DP-FP (SSE2). + INST_2x(cvtsi2sd, kX86InstIdCvtsi2sd, X86XmmReg, X86GpReg) + //! \overload + INST_2x(cvtsi2sd, kX86InstIdCvtsi2sd, X86XmmReg, X86Mem) + + //! Convert scalar SP-FP to DP-FP (SSE2). + INST_2x(cvtss2sd, kX86InstIdCvtss2sd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvtss2sd, kX86InstIdCvtss2sd, X86XmmReg, X86Mem) + + //! Convert with truncation packed DP-FP to packed DWORDs (SSE2). + INST_2x(cvttpd2pi, kX86InstIdCvttpd2pi, X86MmReg, X86XmmReg) + //! \overload + INST_2x(cvttpd2pi, kX86InstIdCvttpd2pi, X86MmReg, X86Mem) + + //! Convert with truncation packed DP-FP to packed DWORDs (SSE2). + INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86Mem) + + //! Convert with truncation packed SP-FP to packed DWORDs (SSE2). + INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86Mem) + + //! Convert with truncation scalar DP-FP to signed DWORDs (SSE2). + INST_2x(cvttsd2si, kX86InstIdCvttsd2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(cvttsd2si, kX86InstIdCvttsd2si, X86GpReg, X86Mem) + + //! Packed DP-FP divide (SSE2). + INST_2x(divpd, kX86InstIdDivpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(divpd, kX86InstIdDivpd, X86XmmReg, X86Mem) + + //! Scalar DP-FP divide (SSE2). + INST_2x(divsd, kX86InstIdDivsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(divsd, kX86InstIdDivsd, X86XmmReg, X86Mem) + + //! Load fence (SSE2). + INST_0x(lfence, kX86InstIdLfence) + + //! Store selected bytes of DQWORD to DS:EDI/RDI (SSE2). + INST_2x(maskmovdqu, kX86InstIdMaskmovdqu, X86XmmReg, X86XmmReg) + + //! Packed DP-FP maximum (SSE2). + INST_2x(maxpd, kX86InstIdMaxpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(maxpd, kX86InstIdMaxpd, X86XmmReg, X86Mem) + + //! Scalar DP-FP maximum (SSE2). + INST_2x(maxsd, kX86InstIdMaxsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(maxsd, kX86InstIdMaxsd, X86XmmReg, X86Mem) + + //! Memory fence (SSE2). + INST_0x(mfence, kX86InstIdMfence) + + //! Packed DP-FP minimum (SSE2). + INST_2x(minpd, kX86InstIdMinpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(minpd, kX86InstIdMinpd, X86XmmReg, X86Mem) + + //! Scalar DP-FP minimum (SSE2). + INST_2x(minsd, kX86InstIdMinsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(minsd, kX86InstIdMinsd, X86XmmReg, X86Mem) + + //! Move aligned DQWORD (SSE2). + INST_2x(movdqa, kX86InstIdMovdqa, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movdqa, kX86InstIdMovdqa, X86XmmReg, X86Mem) + //! \overload + INST_2x(movdqa, kX86InstIdMovdqa, X86Mem, X86XmmReg) + + //! Move unaligned DQWORD (SSE2). + INST_2x(movdqu, kX86InstIdMovdqu, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movdqu, kX86InstIdMovdqu, X86XmmReg, X86Mem) + //! \overload + INST_2x(movdqu, kX86InstIdMovdqu, X86Mem, X86XmmReg) + + //! Extract packed SP-FP sign mask (SSE2). + INST_2x(movmskps, kX86InstIdMovmskps, X86GpReg, X86XmmReg) + + //! Extract packed DP-FP sign mask (SSE2). + INST_2x(movmskpd, kX86InstIdMovmskpd, X86GpReg, X86XmmReg) + + //! Move scalar DP-FP (SSE2). + INST_2x(movsd, kX86InstIdMovsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movsd, kX86InstIdMovsd, X86XmmReg, X86Mem) + //! \overload + INST_2x(movsd, kX86InstIdMovsd, X86Mem, X86XmmReg) + + //! Move aligned packed DP-FP (SSE2). + INST_2x(movapd, kX86InstIdMovapd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movapd, kX86InstIdMovapd, X86XmmReg, X86Mem) + //! \overload + INST_2x(movapd, kX86InstIdMovapd, X86Mem, X86XmmReg) + + //! Move QWORD from XMM to MMX register (SSE2). + INST_2x(movdq2q, kX86InstIdMovdq2q, X86MmReg, X86XmmReg) + + //! Move QWORD from MMX to XMM register (SSE2). + INST_2x(movq2dq, kX86InstIdMovq2dq, X86XmmReg, X86MmReg) + + //! Move high packed DP-FP (SSE2). + INST_2x(movhpd, kX86InstIdMovhpd, X86XmmReg, X86Mem) + //! \overload + INST_2x(movhpd, kX86InstIdMovhpd, X86Mem, X86XmmReg) + + //! Move low packed DP-FP (SSE2). + INST_2x(movlpd, kX86InstIdMovlpd, X86XmmReg, X86Mem) + //! \overload + INST_2x(movlpd, kX86InstIdMovlpd, X86Mem, X86XmmReg) + + //! Store DQWORD using NT hint (SSE2). + INST_2x(movntdq, kX86InstIdMovntdq, X86Mem, X86XmmReg) + + //! Store DWORD using NT hint (SSE2). + INST_2x(movnti, kX86InstIdMovnti, X86Mem, X86GpReg) + + //! Store packed DP-FP using NT hint (SSE2). + INST_2x(movntpd, kX86InstIdMovntpd, X86Mem, X86XmmReg) + + //! Move unaligned packed DP-FP (SSE2). + INST_2x(movupd, kX86InstIdMovupd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movupd, kX86InstIdMovupd, X86XmmReg, X86Mem) + //! \overload + INST_2x(movupd, kX86InstIdMovupd, X86Mem, X86XmmReg) + + //! Packed DP-FP multiply (SSE2). + INST_2x(mulpd, kX86InstIdMulpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(mulpd, kX86InstIdMulpd, X86XmmReg, X86Mem) + + //! Scalar DP-FP multiply (SSE2). + INST_2x(mulsd, kX86InstIdMulsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(mulsd, kX86InstIdMulsd, X86XmmReg, X86Mem) + + //! Packed DP-FP bitwise or (SSE2). + INST_2x(orpd, kX86InstIdOrpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(orpd, kX86InstIdOrpd, X86XmmReg, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (SSE2). + INST_2x(packsswb, kX86InstIdPacksswb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(packsswb, kX86InstIdPacksswb, X86XmmReg, X86Mem) + + //! Pack DWORDs to WORDs with signed saturation (SSE2). + INST_2x(packssdw, kX86InstIdPackssdw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(packssdw, kX86InstIdPackssdw, X86XmmReg, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (SSE2). + INST_2x(packuswb, kX86InstIdPackuswb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(packuswb, kX86InstIdPackuswb, X86XmmReg, X86Mem) + + //! Packed BYTE Add (SSE2). + INST_2x(paddb, kX86InstIdPaddb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddb, kX86InstIdPaddb, X86XmmReg, X86Mem) + + //! Packed WORD add (SSE2). + INST_2x(paddw, kX86InstIdPaddw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddw, kX86InstIdPaddw, X86XmmReg, X86Mem) + + //! Packed DWORD add (SSE2). + INST_2x(paddd, kX86InstIdPaddd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddd, kX86InstIdPaddd, X86XmmReg, X86Mem) + + //! Packed QWORD add (SSE2). + INST_2x(paddq, kX86InstIdPaddq, X86MmReg, X86MmReg) + //! \overload + INST_2x(paddq, kX86InstIdPaddq, X86MmReg, X86Mem) + + //! Packed QWORD add (SSE2). + INST_2x(paddq, kX86InstIdPaddq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddq, kX86InstIdPaddq, X86XmmReg, X86Mem) + + //! Packed BYTE add with saturation (SSE2). + INST_2x(paddsb, kX86InstIdPaddsb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddsb, kX86InstIdPaddsb, X86XmmReg, X86Mem) + + //! Packed WORD add with saturation (SSE2). + INST_2x(paddsw, kX86InstIdPaddsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddsw, kX86InstIdPaddsw, X86XmmReg, X86Mem) + + //! Packed BYTE add with unsigned saturation (SSE2). + INST_2x(paddusb, kX86InstIdPaddusb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddusb, kX86InstIdPaddusb, X86XmmReg, X86Mem) + + //! Packed WORD add with unsigned saturation (SSE2). + INST_2x(paddusw, kX86InstIdPaddusw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(paddusw, kX86InstIdPaddusw, X86XmmReg, X86Mem) + + //! Packed bitwise and (SSE2). + INST_2x(pand, kX86InstIdPand, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pand, kX86InstIdPand, X86XmmReg, X86Mem) + + //! Packed bitwise and-not (SSE2). + INST_2x(pandn, kX86InstIdPandn, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pandn, kX86InstIdPandn, X86XmmReg, X86Mem) + + //! Spin loop hint (SSE2). + INST_0x(pause, kX86InstIdPause) + + //! Packed BYTE average (SSE2). + INST_2x(pavgb, kX86InstIdPavgb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pavgb, kX86InstIdPavgb, X86XmmReg, X86Mem) + + //! Packed WORD average (SSE2). + INST_2x(pavgw, kX86InstIdPavgw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pavgw, kX86InstIdPavgw, X86XmmReg, X86Mem) + + //! Packed BYTE compare for equality (SSE2). + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86XmmReg, X86Mem) + + //! Packed WORD compare for equality (SSE2). + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86XmmReg, X86Mem) + + //! Packed DWORD compare for equality (SSE2). + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86XmmReg, X86Mem) + + //! Packed BYTE compare if greater than (SSE2). + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86XmmReg, X86Mem) + + //! Packed WORD compare if greater than (SSE2). + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86XmmReg, X86Mem) + + //! Packed DWORD compare if greater than (SSE2). + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86XmmReg, X86Mem) + + //! Extract WORD based on selector (SSE2). + INST_3i(pextrw, kX86InstIdPextrw, X86GpReg, X86XmmReg, Imm) + + //! Insert WORD based on selector (SSE2). + INST_3i(pinsrw, kX86InstIdPinsrw, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_3i(pinsrw, kX86InstIdPinsrw, X86XmmReg, X86Mem, Imm) + + //! Packed WORD maximum (SSE2). + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86XmmReg, X86Mem) + + //! Packed BYTE unsigned maximum (SSE2). + INST_2x(pmaxub, kX86InstIdPmaxub, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaxub, kX86InstIdPmaxub, X86XmmReg, X86Mem) + + //! Packed WORD minimum (SSE2). + INST_2x(pminsw, kX86InstIdPminsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pminsw, kX86InstIdPminsw, X86XmmReg, X86Mem) + + //! Packed BYTE unsigned minimum (SSE2). + INST_2x(pminub, kX86InstIdPminub, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pminub, kX86InstIdPminub, X86XmmReg, X86Mem) + + //! Move byte mask (SSE2). + INST_2x(pmovmskb, kX86InstIdPmovmskb, X86GpReg, X86XmmReg) + + //! Packed WORD multiply high (SSE2). + INST_2x(pmulhw, kX86InstIdPmulhw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmulhw, kX86InstIdPmulhw, X86XmmReg, X86Mem) + + //! Packed WORD unsigned multiply high (SSE2). + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86XmmReg, X86Mem) + + //! Packed WORD multiply low (SSE2). + INST_2x(pmullw, kX86InstIdPmullw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmullw, kX86InstIdPmullw, X86XmmReg, X86Mem) + + //! Packed DWORD multiply to QWORD (SSE2). + INST_2x(pmuludq, kX86InstIdPmuludq, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmuludq, kX86InstIdPmuludq, X86MmReg, X86Mem) + + //! Packed DWORD multiply to QWORD (SSE2). + INST_2x(pmuludq, kX86InstIdPmuludq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmuludq, kX86InstIdPmuludq, X86XmmReg, X86Mem) + + //! Packed bitwise or (SSE2). + INST_2x(por, kX86InstIdPor, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(por, kX86InstIdPor, X86XmmReg, X86Mem) + + //! Packed DWORD shift left logical (SSE2). + INST_2x(pslld, kX86InstIdPslld, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pslld, kX86InstIdPslld, X86XmmReg, X86Mem) + //! \overload + INST_2i(pslld, kX86InstIdPslld, X86XmmReg, Imm) + + //! Packed QWORD shift left logical (SSE2). + INST_2x(psllq, kX86InstIdPsllq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psllq, kX86InstIdPsllq, X86XmmReg, X86Mem) + //! \overload + INST_2i(psllq, kX86InstIdPsllq, X86XmmReg, Imm) + + //! Packed WORD shift left logical (SSE2). + INST_2x(psllw, kX86InstIdPsllw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psllw, kX86InstIdPsllw, X86XmmReg, X86Mem) + //! \overload + INST_2i(psllw, kX86InstIdPsllw, X86XmmReg, Imm) + + //! Packed DQWORD shift left logical (SSE2). + INST_2i(pslldq, kX86InstIdPslldq, X86XmmReg, Imm) + + //! Packed DWORD shift right arithmetic (SSE2). + INST_2x(psrad, kX86InstIdPsrad, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psrad, kX86InstIdPsrad, X86XmmReg, X86Mem) + //! \overload + INST_2i(psrad, kX86InstIdPsrad, X86XmmReg, Imm) + + //! Packed WORD shift right arithmetic (SSE2). + INST_2x(psraw, kX86InstIdPsraw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psraw, kX86InstIdPsraw, X86XmmReg, X86Mem) + //! \overload + INST_2i(psraw, kX86InstIdPsraw, X86XmmReg, Imm) + + //! Packed BYTE subtract (SSE2). + INST_2x(psubb, kX86InstIdPsubb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubb, kX86InstIdPsubb, X86XmmReg, X86Mem) + + //! Packed DWORD subtract (SSE2). + INST_2x(psubd, kX86InstIdPsubd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubd, kX86InstIdPsubd, X86XmmReg, X86Mem) + + //! Packed QWORD subtract (SSE2). + INST_2x(psubq, kX86InstIdPsubq, X86MmReg, X86MmReg) + //! \overload + INST_2x(psubq, kX86InstIdPsubq, X86MmReg, X86Mem) + + //! Packed QWORD subtract (SSE2). + INST_2x(psubq, kX86InstIdPsubq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubq, kX86InstIdPsubq, X86XmmReg, X86Mem) + + //! Packed WORD subtract (SSE2). + INST_2x(psubw, kX86InstIdPsubw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubw, kX86InstIdPsubw, X86XmmReg, X86Mem) + + //! Packed WORD to DWORD multiply and add (SSE2). + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86XmmReg, X86Mem) + + //! Packed DWORD shuffle (SSE2). + INST_3i(pshufd, kX86InstIdPshufd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pshufd, kX86InstIdPshufd, X86XmmReg, X86Mem, Imm) + + //! Packed WORD shuffle high (SSE2). + INST_3i(pshufhw, kX86InstIdPshufhw, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pshufhw, kX86InstIdPshufhw, X86XmmReg, X86Mem, Imm) + + //! Packed WORD shuffle low (SSE2). + INST_3i(pshuflw, kX86InstIdPshuflw, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pshuflw, kX86InstIdPshuflw, X86XmmReg, X86Mem, Imm) + + //! Packed DWORD shift right logical (SSE2). + INST_2x(psrld, kX86InstIdPsrld, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psrld, kX86InstIdPsrld, X86XmmReg, X86Mem) + //! \overload + INST_2i(psrld, kX86InstIdPsrld, X86XmmReg, Imm) + + //! Packed QWORD shift right logical (SSE2). + INST_2x(psrlq, kX86InstIdPsrlq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psrlq, kX86InstIdPsrlq, X86XmmReg, X86Mem) + //! \overload + INST_2i(psrlq, kX86InstIdPsrlq, X86XmmReg, Imm) + + //! Scalar DQWORD shift right logical (SSE2). + INST_2i(psrldq, kX86InstIdPsrldq, X86XmmReg, Imm) + + //! Packed WORD shift right logical (SSE2). + INST_2x(psrlw, kX86InstIdPsrlw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psrlw, kX86InstIdPsrlw, X86XmmReg, X86Mem) + //! \overload + INST_2i(psrlw, kX86InstIdPsrlw, X86XmmReg, Imm) + + //! Packed BYTE subtract with saturation (SSE2). + INST_2x(psubsb, kX86InstIdPsubsb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubsb, kX86InstIdPsubsb, X86XmmReg, X86Mem) + + //! Packed WORD subtract with saturation (SSE2). + INST_2x(psubsw, kX86InstIdPsubsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubsw, kX86InstIdPsubsw, X86XmmReg, X86Mem) + + //! Packed BYTE subtract with unsigned saturation (SSE2). + INST_2x(psubusb, kX86InstIdPsubusb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubusb, kX86InstIdPsubusb, X86XmmReg, X86Mem) + + //! Packed WORD subtract with unsigned saturation (SSE2). + INST_2x(psubusw, kX86InstIdPsubusw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psubusw, kX86InstIdPsubusw, X86XmmReg, X86Mem) + + //! Unpack high packed BYTEs to WORDs (SSE2). + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86XmmReg, X86Mem) + + //! Unpack high packed DWORDs to QWORDs (SSE2). + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86XmmReg, X86Mem) + + //! Unpack high packed QWORDs to DQWORD (SSE2). + INST_2x(punpckhqdq, kX86InstIdPunpckhqdq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpckhqdq, kX86InstIdPunpckhqdq, X86XmmReg, X86Mem) + + //! Unpack high packed WORDs to DWORDs (SSE2). + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86XmmReg, X86Mem) + + //! Unpack low packed BYTEs to WORDs (SSE2). + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86XmmReg, X86Mem) + + //! Unpack low packed DWORDs to QWORDs (SSE2). + INST_2x(punpckldq, kX86InstIdPunpckldq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpckldq, kX86InstIdPunpckldq, X86XmmReg, X86Mem) + + //! Unpack low packed QWORDs to DQWORD (SSE2). + INST_2x(punpcklqdq, kX86InstIdPunpcklqdq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpcklqdq, kX86InstIdPunpcklqdq, X86XmmReg, X86Mem) + + //! Unpack low packed WORDs to DWORDs (SSE2). + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86XmmReg, X86Mem) + + //! Packed bitwise xor (SSE2). + INST_2x(pxor, kX86InstIdPxor, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pxor, kX86InstIdPxor, X86XmmReg, X86Mem) + + //! Shuffle DP-FP (SSE2). + INST_3i(shufpd, kX86InstIdShufpd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(shufpd, kX86InstIdShufpd, X86XmmReg, X86Mem, Imm) + + //! Packed DP-FP square root (SSE2). + INST_2x(sqrtpd, kX86InstIdSqrtpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sqrtpd, kX86InstIdSqrtpd, X86XmmReg, X86Mem) + + //! Scalar DP-FP square root (SSE2). + INST_2x(sqrtsd, kX86InstIdSqrtsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sqrtsd, kX86InstIdSqrtsd, X86XmmReg, X86Mem) + + //! Packed DP-FP subtract (SSE2). + INST_2x(subpd, kX86InstIdSubpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(subpd, kX86InstIdSubpd, X86XmmReg, X86Mem) + + //! Scalar DP-FP subtract (SSE2). + INST_2x(subsd, kX86InstIdSubsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(subsd, kX86InstIdSubsd, X86XmmReg, X86Mem) + + //! Scalar DP-FP unordered compare and set EFLAGS (SSE2). + INST_2x(ucomisd, kX86InstIdUcomisd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(ucomisd, kX86InstIdUcomisd, X86XmmReg, X86Mem) + + //! Unpack and interleave high packed DP-FP (SSE2). + INST_2x(unpckhpd, kX86InstIdUnpckhpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(unpckhpd, kX86InstIdUnpckhpd, X86XmmReg, X86Mem) + + //! Unpack and interleave low packed DP-FP (SSE2). + INST_2x(unpcklpd, kX86InstIdUnpcklpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(unpcklpd, kX86InstIdUnpcklpd, X86XmmReg, X86Mem) + + //! Packed DP-FP bitwise xor (SSE2). + INST_2x(xorpd, kX86InstIdXorpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(xorpd, kX86InstIdXorpd, X86XmmReg, X86Mem) + + // -------------------------------------------------------------------------- + // [SSE3] + // -------------------------------------------------------------------------- + + //! Packed DP-FP add/subtract (SSE3). + INST_2x(addsubpd, kX86InstIdAddsubpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(addsubpd, kX86InstIdAddsubpd, X86XmmReg, X86Mem) + + //! Packed SP-FP add/subtract (SSE3). + INST_2x(addsubps, kX86InstIdAddsubps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(addsubps, kX86InstIdAddsubps, X86XmmReg, X86Mem) + + //! Store truncated `fp0` to `short_or_int_or_long[o0]` and POP (FPU & SSE3). + INST_1x(fisttp, kX86InstIdFisttp, X86Mem) + + //! Packed DP-FP horizontal add (SSE3). + INST_2x(haddpd, kX86InstIdHaddpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(haddpd, kX86InstIdHaddpd, X86XmmReg, X86Mem) + + //! Packed SP-FP horizontal add (SSE3). + INST_2x(haddps, kX86InstIdHaddps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(haddps, kX86InstIdHaddps, X86XmmReg, X86Mem) + + //! Packed DP-FP horizontal subtract (SSE3). + INST_2x(hsubpd, kX86InstIdHsubpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(hsubpd, kX86InstIdHsubpd, X86XmmReg, X86Mem) + + //! Packed SP-FP horizontal subtract (SSE3). + INST_2x(hsubps, kX86InstIdHsubps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(hsubps, kX86InstIdHsubps, X86XmmReg, X86Mem) + + //! Load 128-bits unaligned (SSE3). + INST_2x(lddqu, kX86InstIdLddqu, X86XmmReg, X86Mem) + + //! Setup monitor address (SSE3). + INST_0x(monitor, kX86InstIdMonitor) + + //! Move one DP-FP and duplicate (SSE3). + INST_2x(movddup, kX86InstIdMovddup, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movddup, kX86InstIdMovddup, X86XmmReg, X86Mem) + + //! Move packed SP-FP high and duplicate (SSE3). + INST_2x(movshdup, kX86InstIdMovshdup, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movshdup, kX86InstIdMovshdup, X86XmmReg, X86Mem) + + //! Move packed SP-FP low and duplicate (SSE3). + INST_2x(movsldup, kX86InstIdMovsldup, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(movsldup, kX86InstIdMovsldup, X86XmmReg, X86Mem) + + //! Monitor wait (SSE3). + INST_0x(mwait, kX86InstIdMwait) + + // -------------------------------------------------------------------------- + // [SSSE3] + // -------------------------------------------------------------------------- + + //! Packed BYTE sign (SSSE3). + INST_2x(psignb, kX86InstIdPsignb, X86MmReg, X86MmReg) + //! \overload + INST_2x(psignb, kX86InstIdPsignb, X86MmReg, X86Mem) + + //! Packed BYTE sign (SSSE3). + INST_2x(psignb, kX86InstIdPsignb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psignb, kX86InstIdPsignb, X86XmmReg, X86Mem) + + //! Packed DWORD sign (SSSE3). + INST_2x(psignd, kX86InstIdPsignd, X86MmReg, X86MmReg) + //! \overload + INST_2x(psignd, kX86InstIdPsignd, X86MmReg, X86Mem) + + //! Packed DWORD sign (SSSE3). + INST_2x(psignd, kX86InstIdPsignd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psignd, kX86InstIdPsignd, X86XmmReg, X86Mem) + + //! Packed WORD sign (SSSE3). + INST_2x(psignw, kX86InstIdPsignw, X86MmReg, X86MmReg) + //! \overload + INST_2x(psignw, kX86InstIdPsignw, X86MmReg, X86Mem) + + //! Packed WORD sign (SSSE3). + INST_2x(psignw, kX86InstIdPsignw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(psignw, kX86InstIdPsignw, X86XmmReg, X86Mem) + + //! Packed DWORD horizontal add (SSSE3). + INST_2x(phaddd, kX86InstIdPhaddd, X86MmReg, X86MmReg) + //! \overload + INST_2x(phaddd, kX86InstIdPhaddd, X86MmReg, X86Mem) + + //! Packed DWORD horizontal add (SSSE3). + INST_2x(phaddd, kX86InstIdPhaddd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(phaddd, kX86InstIdPhaddd, X86XmmReg, X86Mem) + + //! Packed WORD horizontal add with saturation (SSSE3). + INST_2x(phaddsw, kX86InstIdPhaddsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(phaddsw, kX86InstIdPhaddsw, X86MmReg, X86Mem) + + //! Packed WORD horizontal add with saturation (SSSE3). + INST_2x(phaddsw, kX86InstIdPhaddsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(phaddsw, kX86InstIdPhaddsw, X86XmmReg, X86Mem) + + //! Packed WORD horizontal add (SSSE3). + INST_2x(phaddw, kX86InstIdPhaddw, X86MmReg, X86MmReg) + //! \overload + INST_2x(phaddw, kX86InstIdPhaddw, X86MmReg, X86Mem) + + //! Packed WORD horizontal add (SSSE3). + INST_2x(phaddw, kX86InstIdPhaddw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(phaddw, kX86InstIdPhaddw, X86XmmReg, X86Mem) + + //! Packed DWORD horizontal subtract (SSSE3). + INST_2x(phsubd, kX86InstIdPhsubd, X86MmReg, X86MmReg) + //! \overload + INST_2x(phsubd, kX86InstIdPhsubd, X86MmReg, X86Mem) + + //! Packed DWORD horizontal subtract (SSSE3). + INST_2x(phsubd, kX86InstIdPhsubd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(phsubd, kX86InstIdPhsubd, X86XmmReg, X86Mem) + + //! Packed WORD horizontal subtract with saturation (SSSE3). + INST_2x(phsubsw, kX86InstIdPhsubsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(phsubsw, kX86InstIdPhsubsw, X86MmReg, X86Mem) + + //! Packed WORD horizontal subtract with saturation (SSSE3). + INST_2x(phsubsw, kX86InstIdPhsubsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(phsubsw, kX86InstIdPhsubsw, X86XmmReg, X86Mem) + + //! Packed WORD horizontal subtract (SSSE3). + INST_2x(phsubw, kX86InstIdPhsubw, X86MmReg, X86MmReg) + //! \overload + INST_2x(phsubw, kX86InstIdPhsubw, X86MmReg, X86Mem) + + //! Packed WORD horizontal subtract (SSSE3). + INST_2x(phsubw, kX86InstIdPhsubw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(phsubw, kX86InstIdPhsubw, X86XmmReg, X86Mem) + + //! Packed multiply and add signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86MmReg, X86Mem) + + //! Packed multiply and add signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86XmmReg, X86Mem) + + //! Packed BYTE absolute value (SSSE3). + INST_2x(pabsb, kX86InstIdPabsb, X86MmReg, X86MmReg) + //! \overload + INST_2x(pabsb, kX86InstIdPabsb, X86MmReg, X86Mem) + + //! Packed BYTE absolute value (SSSE3). + INST_2x(pabsb, kX86InstIdPabsb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pabsb, kX86InstIdPabsb, X86XmmReg, X86Mem) + + //! Packed DWORD absolute value (SSSE3). + INST_2x(pabsd, kX86InstIdPabsd, X86MmReg, X86MmReg) + //! \overload + INST_2x(pabsd, kX86InstIdPabsd, X86MmReg, X86Mem) + + //! Packed DWORD absolute value (SSSE3). + INST_2x(pabsd, kX86InstIdPabsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pabsd, kX86InstIdPabsd, X86XmmReg, X86Mem) + + //! Packed WORD absolute value (SSSE3). + INST_2x(pabsw, kX86InstIdPabsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pabsw, kX86InstIdPabsw, X86MmReg, X86Mem) + + //! Packed WORD absolute value (SSSE3). + INST_2x(pabsw, kX86InstIdPabsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pabsw, kX86InstIdPabsw, X86XmmReg, X86Mem) + + //! Packed WORD multiply high, round and scale (SSSE3). + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86MmReg, X86MmReg) + //! \overload + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86MmReg, X86Mem) + + //! Packed WORD multiply high, round and scale (SSSE3). + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86XmmReg, X86Mem) + + //! Packed BYTE shuffle (SSSE3). + INST_2x(pshufb, kX86InstIdPshufb, X86MmReg, X86MmReg) + //! \overload + INST_2x(pshufb, kX86InstIdPshufb, X86MmReg, X86Mem) + + //! Packed BYTE shuffle (SSSE3). + INST_2x(pshufb, kX86InstIdPshufb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pshufb, kX86InstIdPshufb, X86XmmReg, X86Mem) + + //! Packed align right (SSSE3). + INST_3i(palignr, kX86InstIdPalignr, X86MmReg, X86MmReg, Imm) + //! \overload + INST_3i(palignr, kX86InstIdPalignr, X86MmReg, X86Mem, Imm) + + //! Packed align right (SSSE3). + INST_3i(palignr, kX86InstIdPalignr, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(palignr, kX86InstIdPalignr, X86XmmReg, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.1] + // -------------------------------------------------------------------------- + + //! Packed DP-FP blend (SSE4.1). + INST_3i(blendpd, kX86InstIdBlendpd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(blendpd, kX86InstIdBlendpd, X86XmmReg, X86Mem, Imm) + + //! Packed SP-FP blend (SSE4.1). + INST_3i(blendps, kX86InstIdBlendps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(blendps, kX86InstIdBlendps, X86XmmReg, X86Mem, Imm) + + //! Packed DP-FP variable blend (SSE4.1). + INST_2x(blendvpd, kX86InstIdBlendvpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(blendvpd, kX86InstIdBlendvpd, X86XmmReg, X86Mem) + + //! Packed SP-FP variable blend (SSE4.1). + INST_2x(blendvps, kX86InstIdBlendvps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(blendvps, kX86InstIdBlendvps, X86XmmReg, X86Mem) + + //! Packed DP-FP dot product (SSE4.1). + INST_3i(dppd, kX86InstIdDppd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(dppd, kX86InstIdDppd, X86XmmReg, X86Mem, Imm) + + //! Packed SP-FP dot product (SSE4.1). + INST_3i(dpps, kX86InstIdDpps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(dpps, kX86InstIdDpps, X86XmmReg, X86Mem, Imm) + + //! Extract SP-FP based on selector (SSE4.1). + INST_3i(extractps, kX86InstIdExtractps, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(extractps, kX86InstIdExtractps, X86Mem, X86XmmReg, Imm) + + //! Insert SP-FP based on selector (SSE4.1). + INST_3i(insertps, kX86InstIdInsertps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(insertps, kX86InstIdInsertps, X86XmmReg, X86Mem, Imm) + + //! Load DQWORD aligned using NT hint (SSE4.1). + INST_2x(movntdqa, kX86InstIdMovntdqa, X86XmmReg, X86Mem) + + //! Packed WORD sums of absolute difference (SSE4.1). + INST_3i(mpsadbw, kX86InstIdMpsadbw, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(mpsadbw, kX86InstIdMpsadbw, X86XmmReg, X86Mem, Imm) + + //! Pack DWORDs to WORDs with unsigned saturation (SSE4.1). + INST_2x(packusdw, kX86InstIdPackusdw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(packusdw, kX86InstIdPackusdw, X86XmmReg, X86Mem) + + //! Packed BYTE variable blend (SSE4.1). + INST_2x(pblendvb, kX86InstIdPblendvb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pblendvb, kX86InstIdPblendvb, X86XmmReg, X86Mem) + + //! Packed WORD blend (SSE4.1). + INST_3i(pblendw, kX86InstIdPblendw, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pblendw, kX86InstIdPblendw, X86XmmReg, X86Mem, Imm) + + //! Packed QWORD compare for equality (SSE4.1). + INST_2x(pcmpeqq, kX86InstIdPcmpeqq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpeqq, kX86InstIdPcmpeqq, X86XmmReg, X86Mem) + + //! Extract BYTE based on selector (SSE4.1). + INST_3i(pextrb, kX86InstIdPextrb, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(pextrb, kX86InstIdPextrb, X86Mem, X86XmmReg, Imm) + + //! Extract DWORD based on selector (SSE4.1). + INST_3i(pextrd, kX86InstIdPextrd, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(pextrd, kX86InstIdPextrd, X86Mem, X86XmmReg, Imm) + + //! Extract QWORD based on selector (SSE4.1). + INST_3i(pextrq, kX86InstIdPextrq, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(pextrq, kX86InstIdPextrq, X86Mem, X86XmmReg, Imm) + + //! Extract WORD based on selector (SSE4.1). + INST_3i(pextrw, kX86InstIdPextrw, X86Mem, X86XmmReg, Imm) + + //! Packed WORD horizontal minimum (SSE4.1). + INST_2x(phminposuw, kX86InstIdPhminposuw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(phminposuw, kX86InstIdPhminposuw, X86XmmReg, X86Mem) + + //! Insert BYTE based on selector (SSE4.1). + INST_3i(pinsrb, kX86InstIdPinsrb, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_3i(pinsrb, kX86InstIdPinsrb, X86XmmReg, X86Mem, Imm) + + //! Insert DWORD based on selector (SSE4.1). + INST_3i(pinsrd, kX86InstIdPinsrd, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_3i(pinsrd, kX86InstIdPinsrd, X86XmmReg, X86Mem, Imm) + + //! Insert QWORD based on selector (SSE4.1). + INST_3i(pinsrq, kX86InstIdPinsrq, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_3i(pinsrq, kX86InstIdPinsrq, X86XmmReg, X86Mem, Imm) + + //! Packed BYTE maximum (SSE4.1). + INST_2x(pmaxsb, kX86InstIdPmaxsb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaxsb, kX86InstIdPmaxsb, X86XmmReg, X86Mem) + + //! Packed DWORD maximum (SSE4.1). + INST_2x(pmaxsd, kX86InstIdPmaxsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaxsd, kX86InstIdPmaxsd, X86XmmReg, X86Mem) + + //! Packed DWORD unsigned maximum (SSE4.1). + INST_2x(pmaxud, kX86InstIdPmaxud, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaxud, kX86InstIdPmaxud, X86XmmReg, X86Mem) + + //! Packed WORD unsigned maximum (SSE4.1). + INST_2x(pmaxuw, kX86InstIdPmaxuw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmaxuw, kX86InstIdPmaxuw, X86XmmReg, X86Mem) + + //! Packed BYTE minimum (SSE4.1). + INST_2x(pminsb, kX86InstIdPminsb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pminsb, kX86InstIdPminsb, X86XmmReg, X86Mem) + + //! Packed DWORD minimum (SSE4.1). + INST_2x(pminsd, kX86InstIdPminsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pminsd, kX86InstIdPminsd, X86XmmReg, X86Mem) + + //! Packed WORD unsigned minimum (SSE4.1). + INST_2x(pminuw, kX86InstIdPminuw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pminuw, kX86InstIdPminuw, X86XmmReg, X86Mem) + + //! Packed DWORD unsigned minimum (SSE4.1). + INST_2x(pminud, kX86InstIdPminud, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pminud, kX86InstIdPminud, X86XmmReg, X86Mem) + + //! BYTE to DWORD with sign extend (SSE4.1). + INST_2x(pmovsxbd, kX86InstIdPmovsxbd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovsxbd, kX86InstIdPmovsxbd, X86XmmReg, X86Mem) + + //! Packed BYTE to QWORD with sign extend (SSE4.1). + INST_2x(pmovsxbq, kX86InstIdPmovsxbq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovsxbq, kX86InstIdPmovsxbq, X86XmmReg, X86Mem) + + //! Packed BYTE to WORD with sign extend (SSE4.1). + INST_2x(pmovsxbw, kX86InstIdPmovsxbw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovsxbw, kX86InstIdPmovsxbw, X86XmmReg, X86Mem) + + //! Packed DWORD to QWORD with sign extend (SSE4.1). + INST_2x(pmovsxdq, kX86InstIdPmovsxdq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovsxdq, kX86InstIdPmovsxdq, X86XmmReg, X86Mem) + + //! Packed WORD to DWORD with sign extend (SSE4.1). + INST_2x(pmovsxwd, kX86InstIdPmovsxwd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovsxwd, kX86InstIdPmovsxwd, X86XmmReg, X86Mem) + + //! Packed WORD to QWORD with sign extend (SSE4.1). + INST_2x(pmovsxwq, kX86InstIdPmovsxwq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovsxwq, kX86InstIdPmovsxwq, X86XmmReg, X86Mem) + + //! BYTE to DWORD with zero extend (SSE4.1). + INST_2x(pmovzxbd, kX86InstIdPmovzxbd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovzxbd, kX86InstIdPmovzxbd, X86XmmReg, X86Mem) + + //! Packed BYTE to QWORD with zero extend (SSE4.1). + INST_2x(pmovzxbq, kX86InstIdPmovzxbq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovzxbq, kX86InstIdPmovzxbq, X86XmmReg, X86Mem) + + //! BYTE to WORD with zero extend (SSE4.1). + INST_2x(pmovzxbw, kX86InstIdPmovzxbw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovzxbw, kX86InstIdPmovzxbw, X86XmmReg, X86Mem) + + //! Packed DWORD to QWORD with zero extend (SSE4.1). + INST_2x(pmovzxdq, kX86InstIdPmovzxdq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovzxdq, kX86InstIdPmovzxdq, X86XmmReg, X86Mem) + + //! Packed WORD to DWORD with zero extend (SSE4.1). + INST_2x(pmovzxwd, kX86InstIdPmovzxwd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovzxwd, kX86InstIdPmovzxwd, X86XmmReg, X86Mem) + + //! Packed WORD to QWORD with zero extend (SSE4.1). + INST_2x(pmovzxwq, kX86InstIdPmovzxwq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmovzxwq, kX86InstIdPmovzxwq, X86XmmReg, X86Mem) + + //! Packed DWORD to QWORD multiply (SSE4.1). + INST_2x(pmuldq, kX86InstIdPmuldq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmuldq, kX86InstIdPmuldq, X86XmmReg, X86Mem) + + //! Packed DWORD multiply low (SSE4.1). + INST_2x(pmulld, kX86InstIdPmulld, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pmulld, kX86InstIdPmulld, X86XmmReg, X86Mem) + + //! Logical compare (SSE4.1). + INST_2x(ptest, kX86InstIdPtest, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(ptest, kX86InstIdPtest, X86XmmReg, X86Mem) + + //! Packed DP-FP round (SSE4.1). + INST_3i(roundpd, kX86InstIdRoundpd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(roundpd, kX86InstIdRoundpd, X86XmmReg, X86Mem, Imm) + + //! Packed SP-FP round (SSE4.1). + INST_3i(roundps, kX86InstIdRoundps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(roundps, kX86InstIdRoundps, X86XmmReg, X86Mem, Imm) + + //! Scalar DP-FP round (SSE4.1). + INST_3i(roundsd, kX86InstIdRoundsd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(roundsd, kX86InstIdRoundsd, X86XmmReg, X86Mem, Imm) + + //! Scalar SP-FP round (SSE4.1). + INST_3i(roundss, kX86InstIdRoundss, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(roundss, kX86InstIdRoundss, X86XmmReg, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.2] + // -------------------------------------------------------------------------- + + //! Accumulate CRC32 value (polynomial 0x11EDC6F41) (SSE4.2). + INST_2x(crc32, kX86InstIdCrc32, X86GpReg, X86GpReg) + //! \overload + INST_2x(crc32, kX86InstIdCrc32, X86GpReg, X86Mem) + + //! Packed compare explicit length strings, return index in ECX (SSE4.2). + INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmReg, X86Mem, Imm) + + //! Packed compare explicit length strings, return mask in XMM0 (SSE4.2). + INST_3i(pcmpestrm, kX86InstIdPcmpestrm, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pcmpestrm, kX86InstIdPcmpestrm, X86XmmReg, X86Mem, Imm) + + //! Packed compare implicit length strings, return index in ECX (SSE4.2). + INST_3i(pcmpistri, kX86InstIdPcmpistri, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pcmpistri, kX86InstIdPcmpistri, X86XmmReg, X86Mem, Imm) + + //! Packed compare implicit length strings, return mask in XMM0 (SSE4.2). + INST_3i(pcmpistrm, kX86InstIdPcmpistrm, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pcmpistrm, kX86InstIdPcmpistrm, X86XmmReg, X86Mem, Imm) + + //! Packed QWORD compare if greater than (SSE4.2). + INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmReg, X86Mem) + + // -------------------------------------------------------------------------- + // [SSE4a] + // -------------------------------------------------------------------------- + + //! Extract Field (SSE4a). + INST_2x(extrq, kX86InstIdExtrq, X86XmmReg, X86XmmReg) + //! Extract Field (SSE4a). + INST_3ii(extrq, kX86InstIdExtrq, X86XmmReg, Imm, Imm) + + //! Insert Field (SSE4a). + INST_2x(insertq, kX86InstIdInsertq, X86XmmReg, X86XmmReg) + //! Insert Field (SSE4a). + INST_4ii(insertq, kX86InstIdInsertq, X86XmmReg, X86XmmReg, Imm, Imm) + + //! Move Non-Temporal Scalar DP-FP (SSE4a). + INST_2x(movntsd, kX86InstIdMovntsd, X86Mem, X86XmmReg) + //! Move Non-Temporal Scalar SP-FP (SSE4a). + INST_2x(movntss, kX86InstIdMovntss, X86Mem, X86XmmReg) + + // -------------------------------------------------------------------------- + // [AESNI] + // -------------------------------------------------------------------------- + + //! Perform a single round of the AES decryption flow (AESNI). + INST_2x(aesdec, kX86InstIdAesdec, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(aesdec, kX86InstIdAesdec, X86XmmReg, X86Mem) + + //! Perform the last round of the AES decryption flow (AESNI). + INST_2x(aesdeclast, kX86InstIdAesdeclast, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(aesdeclast, kX86InstIdAesdeclast, X86XmmReg, X86Mem) + + //! Perform a single round of the AES encryption flow (AESNI). + INST_2x(aesenc, kX86InstIdAesenc, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(aesenc, kX86InstIdAesenc, X86XmmReg, X86Mem) + + //! Perform the last round of the AES encryption flow (AESNI). + INST_2x(aesenclast, kX86InstIdAesenclast, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(aesenclast, kX86InstIdAesenclast, X86XmmReg, X86Mem) + + //! Perform the InvMixColumns transformation (AESNI). + INST_2x(aesimc, kX86InstIdAesimc, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(aesimc, kX86InstIdAesimc, X86XmmReg, X86Mem) + + //! Assist in expanding the AES cipher key (AESNI). + INST_3i(aeskeygenassist, kX86InstIdAeskeygenassist, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(aeskeygenassist, kX86InstIdAeskeygenassist, X86XmmReg, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [SHA] + // -------------------------------------------------------------------------- + + //! Perform an intermediate calculation for the next four SHA1 message DWORDs (SHA). + INST_2x(sha1msg1, kX86InstIdSha1msg1, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sha1msg1, kX86InstIdSha1msg1, X86XmmReg, X86Mem) + + //! Perform a final calculation for the next four SHA1 message DWORDs (SHA). + INST_2x(sha1msg2, kX86InstIdSha1msg2, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sha1msg2, kX86InstIdSha1msg2, X86XmmReg, X86Mem) + + //! Calculate SHA1 state variable E after four rounds (SHA). + INST_2x(sha1nexte, kX86InstIdSha1nexte, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sha1nexte, kX86InstIdSha1nexte, X86XmmReg, X86Mem) + + //! Perform four rounds of SHA1 operation (SHA). + INST_3i(sha1rnds4, kX86InstIdSha1rnds4, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(sha1rnds4, kX86InstIdSha1rnds4, X86XmmReg, X86Mem, Imm) + + //! Perform an intermediate calculation for the next four SHA256 message DWORDs (SHA). + INST_2x(sha256msg1, kX86InstIdSha256msg1, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sha256msg1, kX86InstIdSha256msg1, X86XmmReg, X86Mem) + + //! Perform a final calculation for the next four SHA256 message DWORDs (SHA). + INST_2x(sha256msg2, kX86InstIdSha256msg2, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sha256msg2, kX86InstIdSha256msg2, X86XmmReg, X86Mem) + + //! Perform two rounds of SHA256 operation (SHA). + INST_2x(sha256rnds2, kX86InstIdSha256rnds2, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(sha256rnds2, kX86InstIdSha256rnds2, X86XmmReg, X86Mem) + + // -------------------------------------------------------------------------- + // [PCLMULQDQ] + // -------------------------------------------------------------------------- + + //! Packed QWORD to DQWORD carry-less multiply (PCLMULQDQ). + INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmReg, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX] + // -------------------------------------------------------------------------- + + //! Packed DP-FP add (AVX). + INST_3x(vaddpd, kX86InstIdVaddpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaddpd, kX86InstIdVaddpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vaddpd, kX86InstIdVaddpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vaddpd, kX86InstIdVaddpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP add (AVX). + INST_3x(vaddps, kX86InstIdVaddps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaddps, kX86InstIdVaddps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vaddps, kX86InstIdVaddps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vaddps, kX86InstIdVaddps, X86YmmReg, X86YmmReg, X86Mem) + + //! Scalar DP-FP add (AVX) + INST_3x(vaddsd, kX86InstIdVaddsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaddsd, kX86InstIdVaddsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Scalar SP-FP add (AVX) + INST_3x(vaddss, kX86InstIdVaddss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaddss, kX86InstIdVaddss, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DP-FP add/subtract (AVX). + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP add/subtract (AVX). + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DP-FP bitwise and (AVX). + INST_3x(vandpd, kX86InstIdVandpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vandpd, kX86InstIdVandpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vandpd, kX86InstIdVandpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vandpd, kX86InstIdVandpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP bitwise and (AVX). + INST_3x(vandps, kX86InstIdVandps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vandps, kX86InstIdVandps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vandps, kX86InstIdVandps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vandps, kX86InstIdVandps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DP-FP bitwise and-not (AVX). + INST_3x(vandnpd, kX86InstIdVandnpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vandnpd, kX86InstIdVandnpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vandnpd, kX86InstIdVandnpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vandnpd, kX86InstIdVandnpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP bitwise and-not (AVX). + INST_3x(vandnps, kX86InstIdVandnps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vandnps, kX86InstIdVandnps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vandnps, kX86InstIdVandnps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vandnps, kX86InstIdVandnps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DP-FP blend (AVX). + INST_4i(vblendpd, kX86InstIdVblendpd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vblendpd, kX86InstIdVblendpd, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vblendpd, kX86InstIdVblendpd, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vblendpd, kX86InstIdVblendpd, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed SP-FP blend (AVX). + INST_4i(vblendps, kX86InstIdVblendps, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vblendps, kX86InstIdVblendps, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vblendps, kX86InstIdVblendps, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vblendps, kX86InstIdVblendps, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed DP-FP variable blend (AVX). + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + //! \overload + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + + //! Packed SP-FP variable blend (AVX). + INST_4x(vblendvps, kX86InstIdVblendvps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_4x(vblendvps, kX86InstIdVblendvps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + //! \overload + INST_4x(vblendvps, kX86InstIdVblendvps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_4x(vblendvps, kX86InstIdVblendvps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + + //! Broadcast 128-bits of FP data in `o1` to low and high 128-bits in `o0` (AVX). + INST_2x(vbroadcastf128, kX86InstIdVbroadcastf128, X86YmmReg, X86Mem) + //! Broadcast DP-FP element in `o1` to four locations in `o0` (AVX). + INST_2x(vbroadcastsd, kX86InstIdVbroadcastsd, X86YmmReg, X86Mem) + //! Broadcast SP-FP element in `o1` to four locations in `o0` (AVX). + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86XmmReg, X86Mem) + //! Broadcast SP-FP element in `o1` to eight locations in `o0` (AVX). + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86YmmReg, X86Mem) + + //! Packed DP-FP compare (AVX). + INST_4i(vcmppd, kX86InstIdVcmppd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vcmppd, kX86InstIdVcmppd, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vcmppd, kX86InstIdVcmppd, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vcmppd, kX86InstIdVcmppd, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed SP-FP compare (AVX). + INST_4i(vcmpps, kX86InstIdVcmpps, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vcmpps, kX86InstIdVcmpps, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vcmpps, kX86InstIdVcmpps, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vcmpps, kX86InstIdVcmpps, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Scalar DP-FP compare (AVX). + INST_4i(vcmpsd, kX86InstIdVcmpsd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vcmpsd, kX86InstIdVcmpsd, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Scalar SP-FP compare (AVX). + INST_4i(vcmpss, kX86InstIdVcmpss, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vcmpss, kX86InstIdVcmpss, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Scalar DP-FP ordered compare and set EFLAGS (AVX). + INST_2x(vcomisd, kX86InstIdVcomisd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcomisd, kX86InstIdVcomisd, X86XmmReg, X86Mem) + + //! Scalar SP-FP ordered compare and set EFLAGS (AVX). + INST_2x(vcomiss, kX86InstIdVcomiss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcomiss, kX86InstIdVcomiss, X86XmmReg, X86Mem) + + //! Convert packed QWORDs to packed DP-FP (AVX). + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86YmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86YmmReg, X86Mem) + + //! Convert packed QWORDs to packed SP-FP (AVX). + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86XmmReg, X86Mem) + //! \overload + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86YmmReg, X86Mem) + + //! Convert packed DP-FP to packed DWORDs (AVX). + INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86YmmReg) + //! \overload + INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86Mem) + + //! Convert packed DP-FP to packed SP-FP (AVX). + INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmReg, X86YmmReg) + //! \overload + INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmReg, X86Mem) + + //! Convert packed SP-FP to packed DWORDs (AVX). + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86Mem) + //! \overload + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86YmmReg, X86Mem) + + //! Convert packed SP-FP to packed DP-FP (AVX). + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86YmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86YmmReg, X86Mem) + + //! Convert scalar DP-FP to DWORD (AVX). + INST_2x(vcvtsd2si, kX86InstIdVcvtsd2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(vcvtsd2si, kX86InstIdVcvtsd2si, X86GpReg, X86Mem) + + //! Convert scalar DP-FP to scalar SP-FP (AVX). + INST_3x(vcvtsd2ss, kX86InstIdVcvtsd2ss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vcvtsd2ss, kX86InstIdVcvtsd2ss, X86XmmReg, X86XmmReg, X86Mem) + + //! Convert DWORD integer to scalar DP-FP (AVX). + INST_3x(vcvtsi2sd, kX86InstIdVcvtsi2sd, X86XmmReg, X86XmmReg, X86GpReg) + //! \overload + INST_3x(vcvtsi2sd, kX86InstIdVcvtsi2sd, X86XmmReg, X86XmmReg, X86Mem) + + //! Convert scalar INT32 to SP-FP (AVX). + INST_3x(vcvtsi2ss, kX86InstIdVcvtsi2ss, X86XmmReg, X86XmmReg, X86GpReg) + //! \overload + INST_3x(vcvtsi2ss, kX86InstIdVcvtsi2ss, X86XmmReg, X86XmmReg, X86Mem) + + //! Convert scalar SP-FP to DP-FP (AVX). + INST_3x(vcvtss2sd, kX86InstIdVcvtss2sd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vcvtss2sd, kX86InstIdVcvtss2sd, X86XmmReg, X86XmmReg, X86Mem) + + //! Convert scalar SP-FP to INT32 (AVX). + INST_2x(vcvtss2si, kX86InstIdVcvtss2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(vcvtss2si, kX86InstIdVcvtss2si, X86GpReg, X86Mem) + + //! Convert with truncation packed DP-FP to packed DWORDs (AVX). + INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86YmmReg) + //! \overload + INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86Mem) + + //! Convert with truncation packed SP-FP to packed DWORDs (AVX). + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86Mem) + //! \overload + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86YmmReg, X86Mem) + + //! Convert with truncation scalar DP-FP to INT32 (AVX). + INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86Mem) + + //! Convert with truncation scalar SP-FP to INT32 (AVX). + INST_2x(vcvttss2si, kX86InstIdVcvttss2si, X86GpReg, X86XmmReg) + //! \overload + INST_2x(vcvttss2si, kX86InstIdVcvttss2si, X86GpReg, X86Mem) + + //! Packed DP-FP divide (AVX). + INST_3x(vdivpd, kX86InstIdVdivpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vdivpd, kX86InstIdVdivpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vdivpd, kX86InstIdVdivpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vdivpd, kX86InstIdVdivpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP divide (AVX). + INST_3x(vdivps, kX86InstIdVdivps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vdivps, kX86InstIdVdivps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vdivps, kX86InstIdVdivps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vdivps, kX86InstIdVdivps, X86YmmReg, X86YmmReg, X86Mem) + + //! Scalar DP-FP divide (AVX). + INST_3x(vdivsd, kX86InstIdVdivsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vdivsd, kX86InstIdVdivsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Scalar SP-FP divide (AVX). + INST_3x(vdivss, kX86InstIdVdivss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vdivss, kX86InstIdVdivss, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DP-FP dot product (AVX). + INST_4i(vdppd, kX86InstIdVdppd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vdppd, kX86InstIdVdppd, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Packed SP-FP dot product (AVX). + INST_4i(vdpps, kX86InstIdVdpps, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vdpps, kX86InstIdVdpps, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vdpps, kX86InstIdVdpps, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vdpps, kX86InstIdVdpps, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Extract 128 bits of packed FP data from `o1` and store results in `o0` (AVX). + INST_3i(vextractf128, kX86InstIdVextractf128, X86XmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vextractf128, kX86InstIdVextractf128, X86Mem, X86YmmReg, Imm) + + //! Extract SP-FP based on selector (AVX). + INST_3i(vextractps, kX86InstIdVextractps, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(vextractps, kX86InstIdVextractps, X86Mem, X86XmmReg, Imm) + + //! Packed DP-FP horizontal add (AVX). + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP horizontal add (AVX). + INST_3x(vhaddps, kX86InstIdVhaddps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vhaddps, kX86InstIdVhaddps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vhaddps, kX86InstIdVhaddps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vhaddps, kX86InstIdVhaddps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DP-FP horizontal subtract (AVX). + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP horizontal subtract (AVX). + INST_3x(vhsubps, kX86InstIdVhsubps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vhsubps, kX86InstIdVhsubps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vhsubps, kX86InstIdVhsubps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vhsubps, kX86InstIdVhsubps, X86YmmReg, X86YmmReg, X86Mem) + + //! Insert 128-bit of packed FP data based on selector (AVX). + INST_4i(vinsertf128, kX86InstIdVinsertf128, X86YmmReg, X86YmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vinsertf128, kX86InstIdVinsertf128, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Insert SP-FP based on selector (AVX). + INST_4i(vinsertps, kX86InstIdVinsertps, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vinsertps, kX86InstIdVinsertps, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Load 128-bits unaligned (AVX). + INST_2x(vlddqu, kX86InstIdVlddqu, X86XmmReg, X86Mem) + //! Load 256-bits unaligned (AVX). + INST_2x(vlddqu, kX86InstIdVlddqu, X86YmmReg, X86Mem) + + //! Load streaming SIMD extension control/status (AVX). + INST_1x(vldmxcsr, kX86InstIdVldmxcsr, X86Mem) + + //! Store selected bytes of DQWORD to DS:EDI/RDI (AVX). + INST_2x(vmaskmovdqu, kX86InstIdVmaskmovdqu, X86XmmReg, X86XmmReg) + + //! Conditionally load packed DP-FP from `o2` using mask in `o1 and store in `o0` (AVX). + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86Mem, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86Mem, X86YmmReg, X86YmmReg) + + //! Conditionally load packed SP-FP from `o2` using mask in `o1 and store in `o0` (AVX). + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86Mem, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86Mem, X86YmmReg, X86YmmReg) + + //! Packed DP-FP maximum (AVX). + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP maximum (AVX). + INST_3x(vmaxps, kX86InstIdVmaxps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmaxps, kX86InstIdVmaxps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vmaxps, kX86InstIdVmaxps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vmaxps, kX86InstIdVmaxps, X86YmmReg, X86YmmReg, X86Mem) + + //! Scalar DP-FP maximum (AVX). + INST_3x(vmaxsd, kX86InstIdVmaxsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmaxsd, kX86InstIdVmaxsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Scalar SP-FP maximum (AVX). + INST_3x(vmaxss, kX86InstIdVmaxss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmaxss, kX86InstIdVmaxss, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DP-FP minimum (AVX). + INST_3x(vminpd, kX86InstIdVminpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vminpd, kX86InstIdVminpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vminpd, kX86InstIdVminpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vminpd, kX86InstIdVminpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP minimum (AVX). + INST_3x(vminps, kX86InstIdVminps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vminps, kX86InstIdVminps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vminps, kX86InstIdVminps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vminps, kX86InstIdVminps, X86YmmReg, X86YmmReg, X86Mem) + + //! Scalar DP-FP minimum (AVX). + INST_3x(vminsd, kX86InstIdVminsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vminsd, kX86InstIdVminsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Scalar SP-FP minimum (AVX). + INST_3x(vminss, kX86InstIdVminss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vminss, kX86InstIdVminss, X86XmmReg, X86XmmReg, X86Mem) + + //! Move 128-bits of aligned packed DP-FP (AVX). + INST_2x(vmovapd, kX86InstIdVmovapd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86Mem, X86XmmReg) + //! Move 256-bits of aligned packed DP-FP (AVX). + INST_2x(vmovapd, kX86InstIdVmovapd, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86YmmReg, X86Mem) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86Mem, X86YmmReg) + + //! Move 128-bits of aligned packed SP-FP (AVX). + INST_2x(vmovaps, kX86InstIdVmovaps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86Mem, X86XmmReg) + //! Move 256-bits of aligned packed SP-FP (AVX). + INST_2x(vmovaps, kX86InstIdVmovaps, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86YmmReg, X86Mem) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86Mem, X86YmmReg) + + //! Move DWORD (AVX). + INST_2x(vmovd, kX86InstIdVmovd, X86XmmReg, X86GpReg) + //! \overload + INST_2x(vmovd, kX86InstIdVmovd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovd, kX86InstIdVmovd, X86GpReg, X86XmmReg) + //! \overload + INST_2x(vmovd, kX86InstIdVmovd, X86Mem, X86XmmReg) + + //! Move QWORD (AVX). + INST_2x(vmovq, kX86InstIdVmovq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovq, kX86InstIdVmovq, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovq, kX86InstIdVmovq, X86Mem, X86XmmReg) + + //! Move QWORD (AVX and X64 Only). + INST_2x(vmovq, kX86InstIdVmovq, X86XmmReg, X86GpReg) + //! \overload + INST_2x(vmovq, kX86InstIdVmovq, X86GpReg, X86XmmReg) + + //! Move one DP-FP and duplicate (AVX). + INST_2x(vmovddup, kX86InstIdVmovddup, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovddup, kX86InstIdVmovddup, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovddup, kX86InstIdVmovddup, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovddup, kX86InstIdVmovddup, X86YmmReg, X86Mem) + + //! Move 128-bits aligned (AVX). + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86Mem, X86XmmReg) + //! Move 256-bits aligned (AVX). + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86YmmReg, X86Mem) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86Mem, X86YmmReg) + + //! Move 128-bits unaligned (AVX). + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86Mem, X86XmmReg) + //! Move 256-bits unaligned (AVX). + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86YmmReg, X86Mem) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86Mem, X86YmmReg) + + //! High to low packed SP-FP (AVX). + INST_3x(vmovhlps, kX86InstIdVmovhlps, X86XmmReg, X86XmmReg, X86XmmReg) + + //! Move high packed DP-FP (AVX). + INST_3x(vmovhpd, kX86InstIdVmovhpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovhpd, kX86InstIdVmovhpd, X86Mem, X86XmmReg) + + //! Move high packed SP-FP (AVX). + INST_3x(vmovhps, kX86InstIdVmovhps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovhps, kX86InstIdVmovhps, X86Mem, X86XmmReg) + + //! Move low to high packed SP-FP (AVX). + INST_3x(vmovlhps, kX86InstIdVmovlhps, X86XmmReg, X86XmmReg, X86XmmReg) + + //! Move low packed DP-FP (AVX). + INST_3x(vmovlpd, kX86InstIdVmovlpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovlpd, kX86InstIdVmovlpd, X86Mem, X86XmmReg) + + //! Move low packed SP-FP (AVX). + INST_3x(vmovlps, kX86InstIdVmovlps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovlps, kX86InstIdVmovlps, X86Mem, X86XmmReg) + + //! Extract packed DP-FP sign mask (AVX). + INST_2x(vmovmskpd, kX86InstIdVmovmskpd, X86GpReg, X86XmmReg) + //! \overload + INST_2x(vmovmskpd, kX86InstIdVmovmskpd, X86GpReg, X86YmmReg) + + //! Extract packed SP-FP sign mask (AVX). + INST_2x(vmovmskps, kX86InstIdVmovmskps, X86GpReg, X86XmmReg) + //! \overload + INST_2x(vmovmskps, kX86InstIdVmovmskps, X86GpReg, X86YmmReg) + + //! Store 128-bits using NT hint (AVX). + INST_2x(vmovntdq, kX86InstIdVmovntdq, X86Mem, X86XmmReg) + //! Store 256-bits using NT hint (AVX). + INST_2x(vmovntdq, kX86InstIdVmovntdq, X86Mem, X86YmmReg) + + //! Store 128-bits aligned using NT hint (AVX). + INST_2x(vmovntdqa, kX86InstIdVmovntdqa, X86XmmReg, X86Mem) + + //! Store packed DP-FP (128-bits) using NT hint (AVX). + INST_2x(vmovntpd, kX86InstIdVmovntpd, X86Mem, X86XmmReg) + //! Store packed DP-FP (256-bits) using NT hint (AVX). + INST_2x(vmovntpd, kX86InstIdVmovntpd, X86Mem, X86YmmReg) + + //! Store packed SP-FP (128-bits) using NT hint (AVX). + INST_2x(vmovntps, kX86InstIdVmovntps, X86Mem, X86XmmReg) + //! Store packed SP-FP (256-bits) using NT hint (AVX). + INST_2x(vmovntps, kX86InstIdVmovntps, X86Mem, X86YmmReg) + + //! Move scalar DP-FP (AVX). + INST_3x(vmovsd, kX86InstIdVmovsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovsd, kX86InstIdVmovsd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovsd, kX86InstIdVmovsd, X86Mem, X86XmmReg) + + //! Move packed SP-FP high and duplicate (AVX). + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86YmmReg, X86Mem) + + //! Move packed SP-FP low and duplicate (AVX). + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86YmmReg, X86Mem) + + //! Move scalar SP-FP (AVX). + INST_3x(vmovss, kX86InstIdVmovss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovss, kX86InstIdVmovss, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovss, kX86InstIdVmovss, X86Mem, X86XmmReg) + + //! Move 128-bits of unaligned packed DP-FP (AVX). + INST_2x(vmovupd, kX86InstIdVmovupd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86Mem, X86XmmReg) + //! Move 256-bits of unaligned packed DP-FP (AVX). + INST_2x(vmovupd, kX86InstIdVmovupd, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86YmmReg, X86Mem) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86Mem, X86YmmReg) + + //! Move 128-bits of unaligned packed SP-FP (AVX). + INST_2x(vmovups, kX86InstIdVmovups, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86XmmReg, X86Mem) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86Mem, X86XmmReg) + //! Move 256-bits of unaligned packed SP-FP (AVX). + INST_2x(vmovups, kX86InstIdVmovups, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86YmmReg, X86Mem) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86Mem, X86YmmReg) + + //! Packed WORD sums of absolute difference (AVX). + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Packed DP-FP multiply (AVX). + INST_3x(vmulpd, kX86InstIdVmulpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmulpd, kX86InstIdVmulpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vmulpd, kX86InstIdVmulpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vmulpd, kX86InstIdVmulpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP multiply (AVX). + INST_3x(vmulps, kX86InstIdVmulps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmulps, kX86InstIdVmulps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vmulps, kX86InstIdVmulps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vmulps, kX86InstIdVmulps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP multiply (AVX). + INST_3x(vmulsd, kX86InstIdVmulsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmulsd, kX86InstIdVmulsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Scalar SP-FP multiply (AVX). + INST_3x(vmulss, kX86InstIdVmulss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vmulss, kX86InstIdVmulss, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DP-FP bitwise or (AVX). + INST_3x(vorpd, kX86InstIdVorpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vorpd, kX86InstIdVorpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vorpd, kX86InstIdVorpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vorpd, kX86InstIdVorpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP bitwise or (AVX). + INST_3x(vorps, kX86InstIdVorps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vorps, kX86InstIdVorps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vorps, kX86InstIdVorps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vorps, kX86InstIdVorps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed BYTE absolute value (AVX). + INST_2x(vpabsb, kX86InstIdVpabsb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpabsb, kX86InstIdVpabsb, X86XmmReg, X86Mem) + + //! Packed DWORD absolute value (AVX). + INST_2x(vpabsd, kX86InstIdVpabsd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpabsd, kX86InstIdVpabsd, X86XmmReg, X86Mem) + + //! Packed WORD absolute value (AVX). + INST_2x(vpabsw, kX86InstIdVpabsw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpabsw, kX86InstIdVpabsw, X86XmmReg, X86Mem) + + //! Pack DWORDs to WORDs with signed saturation (AVX). + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86XmmReg, X86XmmReg, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (AVX). + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86XmmReg, X86XmmReg, X86Mem) + + //! Pack DWORDs to WORDs with unsigned saturation (AVX). + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86XmmReg, X86XmmReg, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (AVX). + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE add (AVX). + INST_3x(vpaddb, kX86InstIdVpaddb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddb, kX86InstIdVpaddb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD add (AVX). + INST_3x(vpaddd, kX86InstIdVpaddd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddd, kX86InstIdVpaddd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed QWORD add (AVX). + INST_3x(vpaddq, kX86InstIdVpaddq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddq, kX86InstIdVpaddq, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD add (AVX). + INST_3x(vpaddw, kX86InstIdVpaddw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddw, kX86InstIdVpaddw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE add with saturation (AVX). + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD add with saturation (AVX). + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE add with unsigned saturation (AVX). + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD add with unsigned saturation (AVX). + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed align right (AVX). + INST_4i(vpalignr, kX86InstIdVpalignr, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vpalignr, kX86InstIdVpalignr, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Packed bitwise and (AVX). + INST_3x(vpand, kX86InstIdVpand, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpand, kX86InstIdVpand, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed bitwise and-not (AVX). + INST_3x(vpandn, kX86InstIdVpandn, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpandn, kX86InstIdVpandn, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE average (AVX). + INST_3x(vpavgb, kX86InstIdVpavgb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpavgb, kX86InstIdVpavgb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD average (AVX). + INST_3x(vpavgw, kX86InstIdVpavgw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpavgw, kX86InstIdVpavgw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE variable blend (AVX). + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + + //! Packed WORD blend (AVX). + INST_4i(vpblendw, kX86InstIdVpblendw, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vpblendw, kX86InstIdVpblendw, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Packed BYTEs compare for equality (AVX). + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORDs compare for equality (AVX). + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed QWORDs compare for equality (AVX). + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORDs compare for equality (AVX). + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTEs compare if greater than (AVX). + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORDs compare if greater than (AVX). + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed QWORDs compare if greater than (AVX). + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORDs compare if greater than (AVX). + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed compare explicit length strings, return index in ECX (AVX). + INST_3i(vpcmpestri, kX86InstIdVpcmpestri, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpcmpestri, kX86InstIdVpcmpestri, X86XmmReg, X86Mem, Imm) + + //! Packed compare explicit length strings, return mask in XMM0 (AVX). + INST_3i(vpcmpestrm, kX86InstIdVpcmpestrm, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpcmpestrm, kX86InstIdVpcmpestrm, X86XmmReg, X86Mem, Imm) + + //! Packed compare implicit length strings, return index in ECX (AVX). + INST_3i(vpcmpistri, kX86InstIdVpcmpistri, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpcmpistri, kX86InstIdVpcmpistri, X86XmmReg, X86Mem, Imm) + + //! Packed compare implicit length strings, return mask in XMM0 (AVX). + INST_3i(vpcmpistrm, kX86InstIdVpcmpistrm, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpcmpistrm, kX86InstIdVpcmpistrm, X86XmmReg, X86Mem, Imm) + + //! Packed DP-FP permute (AVX). + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86XmmReg, X86Mem, Imm) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86YmmReg, X86Mem, Imm) + + //! Packed SP-FP permute (AVX). + INST_3x(vpermilps, kX86InstIdVpermilps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpermilps, kX86InstIdVpermilps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vpermilps, kX86InstIdVpermilps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpermilps, kX86InstIdVpermilps, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86XmmReg, X86Mem, Imm) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86YmmReg, X86Mem, Imm) + + //! Packed 128-bit FP permute (AVX). + INST_4i(vperm2f128, kX86InstIdVperm2f128, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vperm2f128, kX86InstIdVperm2f128, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Extract BYTE (AVX). + INST_3i(vpextrb, kX86InstIdVpextrb, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpextrb, kX86InstIdVpextrb, X86Mem, X86XmmReg, Imm) + + //! Extract DWORD (AVX). + INST_3i(vpextrd, kX86InstIdVpextrd, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpextrd, kX86InstIdVpextrd, X86Mem, X86XmmReg, Imm) + + //! Extract QWORD (AVX and X64 Only). + INST_3i(vpextrq, kX86InstIdVpextrq, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpextrq, kX86InstIdVpextrq, X86Mem, X86XmmReg, Imm) + + //! Extract WORD (AVX). + INST_3i(vpextrw, kX86InstIdVpextrw, X86GpReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpextrw, kX86InstIdVpextrw, X86Mem, X86XmmReg, Imm) + + //! Packed DWORD horizontal add (AVX). + INST_3x(vphaddd, kX86InstIdVphaddd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vphaddd, kX86InstIdVphaddd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD horizontal add with saturation (AVX). + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD horizontal add (AVX). + INST_3x(vphaddw, kX86InstIdVphaddw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vphaddw, kX86InstIdVphaddw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD horizontal minimum (AVX). + INST_2x(vphminposuw, kX86InstIdVphminposuw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vphminposuw, kX86InstIdVphminposuw, X86XmmReg, X86Mem) + + //! Packed DWORD horizontal subtract (AVX). + INST_3x(vphsubd, kX86InstIdVphsubd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vphsubd, kX86InstIdVphsubd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD horizontal subtract with saturation (AVX). + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD horizontal subtract (AVX). + INST_3x(vphsubw, kX86InstIdVphsubw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vphsubw, kX86InstIdVphsubw, X86XmmReg, X86XmmReg, X86Mem) + + //! Insert BYTE based on selector (AVX). + INST_4i(vpinsrb, kX86InstIdVpinsrb, X86XmmReg, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_4i(vpinsrb, kX86InstIdVpinsrb, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Insert DWORD based on selector (AVX). + INST_4i(vpinsrd, kX86InstIdVpinsrd, X86XmmReg, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_4i(vpinsrd, kX86InstIdVpinsrd, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Insert QWORD based on selector (AVX and X64 Only). + INST_4i(vpinsrq, kX86InstIdVpinsrq, X86XmmReg, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_4i(vpinsrq, kX86InstIdVpinsrq, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Insert WORD based on selector (AVX). + INST_4i(vpinsrw, kX86InstIdVpinsrw, X86XmmReg, X86XmmReg, X86GpReg, Imm) + //! \overload + INST_4i(vpinsrw, kX86InstIdVpinsrw, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Packed multiply and add signed and unsigned bytes (AVX). + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD multiply and add to packed DWORD (AVX). + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE maximum (AVX). + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD maximum (AVX). + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD maximum (AVX). + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE unsigned maximum (AVX). + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD unsigned maximum (AVX). + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD unsigned maximum (AVX). + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE minimum (AVX). + INST_3x(vpminsb, kX86InstIdVpminsb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpminsb, kX86InstIdVpminsb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD minimum (AVX). + INST_3x(vpminsd, kX86InstIdVpminsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpminsd, kX86InstIdVpminsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD minimum (AVX). + INST_3x(vpminsw, kX86InstIdVpminsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpminsw, kX86InstIdVpminsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE unsigned minimum (AVX). + INST_3x(vpminub, kX86InstIdVpminub, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpminub, kX86InstIdVpminub, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD unsigned minimum (AVX). + INST_3x(vpminud, kX86InstIdVpminud, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpminud, kX86InstIdVpminud, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD unsigned minimum (AVX). + INST_3x(vpminuw, kX86InstIdVpminuw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpminuw, kX86InstIdVpminuw, X86XmmReg, X86XmmReg, X86Mem) + + //! Move Byte mask to integer (AVX). + INST_2x(vpmovmskb, kX86InstIdVpmovmskb, X86GpReg, X86XmmReg) + + //! BYTE to DWORD with sign extend (AVX). + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86XmmReg, X86Mem) + + //! Packed BYTE to QWORD with sign extend (AVX). + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86XmmReg, X86Mem) + + //! Packed BYTE to WORD with sign extend (AVX). + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86XmmReg, X86Mem) + + //! Packed DWORD to QWORD with sign extend (AVX). + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86XmmReg, X86Mem) + + //! Packed WORD to DWORD with sign extend (AVX). + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86XmmReg, X86Mem) + + //! Packed WORD to QWORD with sign extend (AVX). + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86XmmReg, X86Mem) + + //! BYTE to DWORD with zero extend (AVX). + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86XmmReg, X86Mem) + + //! Packed BYTE to QWORD with zero extend (AVX). + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86XmmReg, X86Mem) + + //! BYTE to WORD with zero extend (AVX). + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86XmmReg, X86Mem) + + //! Packed DWORD to QWORD with zero extend (AVX). + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86XmmReg, X86Mem) + + //! Packed WORD to DWORD with zero extend (AVX). + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86XmmReg, X86Mem) + + //! Packed WORD to QWORD with zero extend (AVX). + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86XmmReg, X86Mem) + + //! Packed DWORD to QWORD multiply (AVX). + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD multiply high, round and scale (AVX). + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD unsigned multiply high (AVX). + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD multiply high (AVX). + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD multiply low (AVX). + INST_3x(vpmulld, kX86InstIdVpmulld, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmulld, kX86InstIdVpmulld, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORDs multiply low (AVX). + INST_3x(vpmullw, kX86InstIdVpmullw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmullw, kX86InstIdVpmullw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD multiply to QWORD (AVX). + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed bitwise or (AVX). + INST_3x(vpor, kX86InstIdVpor, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpor, kX86InstIdVpor, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD sum of absolute differences (AVX). + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE shuffle (AVX). + INST_3x(vpshufb, kX86InstIdVpshufb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpshufb, kX86InstIdVpshufb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD shuffle (AVX). + INST_3i(vpshufd, kX86InstIdVpshufd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpshufd, kX86InstIdVpshufd, X86XmmReg, X86Mem, Imm) + + //! Packed WORD shuffle high (AVX). + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86XmmReg, X86Mem, Imm) + + //! Packed WORD shuffle low (AVX). + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86XmmReg, X86Mem, Imm) + + //! Packed BYTE sign (AVX). + INST_3x(vpsignb, kX86InstIdVpsignb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsignb, kX86InstIdVpsignb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD sign (AVX). + INST_3x(vpsignd, kX86InstIdVpsignd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsignd, kX86InstIdVpsignd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD sign (AVX). + INST_3x(vpsignw, kX86InstIdVpsignw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsignw, kX86InstIdVpsignw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD shift left logical (AVX). + INST_3x(vpslld, kX86InstIdVpslld, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpslld, kX86InstIdVpslld, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpslld, kX86InstIdVpslld, X86XmmReg, X86XmmReg, Imm) + + //! Packed DQWORD shift left logical (AVX). + INST_3i(vpslldq, kX86InstIdVpslldq, X86XmmReg, X86XmmReg, Imm) + + //! Packed QWORD shift left logical (AVX). + INST_3x(vpsllq, kX86InstIdVpsllq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsllq, kX86InstIdVpsllq, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpsllq, kX86InstIdVpsllq, X86XmmReg, X86XmmReg, Imm) + + //! Packed WORD shift left logical (AVX). + INST_3x(vpsllw, kX86InstIdVpsllw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsllw, kX86InstIdVpsllw, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpsllw, kX86InstIdVpsllw, X86XmmReg, X86XmmReg, Imm) + + //! Packed DWORD shift right arithmetic (AVX). + INST_3x(vpsrad, kX86InstIdVpsrad, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsrad, kX86InstIdVpsrad, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpsrad, kX86InstIdVpsrad, X86XmmReg, X86XmmReg, Imm) + + //! Packed WORD shift right arithmetic (AVX). + INST_3x(vpsraw, kX86InstIdVpsraw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsraw, kX86InstIdVpsraw, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpsraw, kX86InstIdVpsraw, X86XmmReg, X86XmmReg, Imm) + + //! Packed DWORD shift right logical (AVX). + INST_3x(vpsrld, kX86InstIdVpsrld, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsrld, kX86InstIdVpsrld, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpsrld, kX86InstIdVpsrld, X86XmmReg, X86XmmReg, Imm) + + //! Scalar DQWORD shift right logical (AVX). + INST_3i(vpsrldq, kX86InstIdVpsrldq, X86XmmReg, X86XmmReg, Imm) + + //! Packed QWORD shift right logical (AVX). + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpsrlq, kX86InstIdVpsrlq, X86XmmReg, X86XmmReg, Imm) + + //! Packed WORD shift right logical (AVX). + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3i(vpsrlw, kX86InstIdVpsrlw, X86XmmReg, X86XmmReg, Imm) + + //! Packed BYTE subtract (AVX). + INST_3x(vpsubb, kX86InstIdVpsubb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubb, kX86InstIdVpsubb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DWORD subtract (AVX). + INST_3x(vpsubd, kX86InstIdVpsubd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubd, kX86InstIdVpsubd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed QWORD subtract (AVX). + INST_3x(vpsubq, kX86InstIdVpsubq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubq, kX86InstIdVpsubq, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD subtract (AVX). + INST_3x(vpsubw, kX86InstIdVpsubw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubw, kX86InstIdVpsubw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE subtract with saturation (AVX). + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD subtract with saturation (AVX). + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed BYTE subtract with unsigned saturation (AVX). + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed WORD subtract with unsigned saturation (AVX). + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86XmmReg, X86XmmReg, X86Mem) + + //! Logical compare (AVX). + INST_2x(vptest, kX86InstIdVptest, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vptest, kX86InstIdVptest, X86XmmReg, X86Mem) + //! \overload + INST_2x(vptest, kX86InstIdVptest, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vptest, kX86InstIdVptest, X86YmmReg, X86Mem) + + //! Unpack high packed BYTEs to WORDs (AVX). + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86XmmReg, X86XmmReg, X86Mem) + + //! Unpack high packed DWORDs to QWORDs (AVX). + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86XmmReg, X86XmmReg, X86Mem) + + //! Unpack high packed QWORDs to DQWORD (AVX). + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86XmmReg, X86XmmReg, X86Mem) + + //! Unpack high packed WORDs to DWORDs (AVX). + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86XmmReg, X86XmmReg, X86Mem) + + //! Unpack low packed BYTEs to WORDs (AVX). + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86XmmReg, X86XmmReg, X86Mem) + + //! Unpack low packed DWORDs to QWORDs (AVX). + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86XmmReg, X86XmmReg, X86Mem) + + //! Unpack low packed QWORDs to DQWORD (AVX). + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86XmmReg, X86XmmReg, X86Mem) + + //! Unpack low packed WORDs to DWORDs (AVX). + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed bitwise xor (AVX). + INST_3x(vpxor, kX86InstIdVpxor, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vpxor, kX86InstIdVpxor, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed SP-FP reciprocal (AVX). + INST_2x(vrcpps, kX86InstIdVrcpps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vrcpps, kX86InstIdVrcpps, X86XmmReg, X86Mem) + //! \overload + INST_2x(vrcpps, kX86InstIdVrcpps, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vrcpps, kX86InstIdVrcpps, X86YmmReg, X86Mem) + + //! Scalar SP-FP reciprocal (AVX). + INST_3x(vrcpss, kX86InstIdVrcpss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vrcpss, kX86InstIdVrcpss, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed SP-FP square root reciprocal (AVX). + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86XmmReg, X86Mem) + //! \overload + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86YmmReg, X86Mem) + + //! Scalar SP-FP square root reciprocal (AVX). + INST_3x(vrsqrtss, kX86InstIdVrsqrtss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vrsqrtss, kX86InstIdVrsqrtss, X86XmmReg, X86XmmReg, X86Mem) + + //! Packed DP-FP round (AVX). + INST_3i(vroundpd, kX86InstIdVroundpd, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vroundpd, kX86InstIdVroundpd, X86XmmReg, X86Mem, Imm) + //! \overload + INST_3i(vroundpd, kX86InstIdVroundpd, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vroundpd, kX86InstIdVroundpd, X86YmmReg, X86Mem, Imm) + + //! Packed SP-FP round (AVX). + INST_3i(vroundps, kX86InstIdVroundps, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vroundps, kX86InstIdVroundps, X86XmmReg, X86Mem, Imm) + //! \overload + INST_3i(vroundps, kX86InstIdVroundps, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vroundps, kX86InstIdVroundps, X86YmmReg, X86Mem, Imm) + + //! Scalar DP-FP round (AVX). + INST_4i(vroundsd, kX86InstIdVroundsd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vroundsd, kX86InstIdVroundsd, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Scalar SP-FP round (AVX). + INST_4i(vroundss, kX86InstIdVroundss, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vroundss, kX86InstIdVroundss, X86XmmReg, X86XmmReg, X86Mem, Imm) + + //! Shuffle DP-FP (AVX). + INST_4i(vshufpd, kX86InstIdVshufpd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vshufpd, kX86InstIdVshufpd, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vshufpd, kX86InstIdVshufpd, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vshufpd, kX86InstIdVshufpd, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Shuffle SP-FP (AVX). + INST_4i(vshufps, kX86InstIdVshufps, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vshufps, kX86InstIdVshufps, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vshufps, kX86InstIdVshufps, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vshufps, kX86InstIdVshufps, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed DP-FP square root (AVX). + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86YmmReg, X86Mem) + + //! Packed SP-FP square root (AVX). + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86XmmReg, X86Mem) + //! \overload + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86YmmReg, X86Mem) + + //! Scalar DP-FP square root (AVX). + INST_3x(vsqrtsd, kX86InstIdVsqrtsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vsqrtsd, kX86InstIdVsqrtsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Scalar SP-FP square root (AVX). + INST_3x(vsqrtss, kX86InstIdVsqrtss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vsqrtss, kX86InstIdVsqrtss, X86XmmReg, X86XmmReg, X86Mem) + + //! Store streaming SIMD extension control/status (AVX). + INST_1x(vstmxcsr, kX86InstIdVstmxcsr, X86Mem) + + //! Packed DP-FP subtract (AVX). + INST_3x(vsubpd, kX86InstIdVsubpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vsubpd, kX86InstIdVsubpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vsubpd, kX86InstIdVsubpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vsubpd, kX86InstIdVsubpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP subtract (AVX). + INST_3x(vsubps, kX86InstIdVsubps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vsubps, kX86InstIdVsubps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vsubps, kX86InstIdVsubps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vsubps, kX86InstIdVsubps, X86YmmReg, X86YmmReg, X86Mem) + + //! Scalar DP-FP subtract (AVX). + INST_3x(vsubsd, kX86InstIdVsubsd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vsubsd, kX86InstIdVsubsd, X86XmmReg, X86XmmReg, X86Mem) + + //! Scalar SP-FP subtract (AVX). + INST_3x(vsubss, kX86InstIdVsubss, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vsubss, kX86InstIdVsubss, X86XmmReg, X86XmmReg, X86Mem) + + //! Logical compare DP-FP (AVX). + INST_2x(vtestpd, kX86InstIdVtestpd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vtestpd, kX86InstIdVtestpd, X86XmmReg, X86Mem) + //! \overload + INST_2x(vtestpd, kX86InstIdVtestpd, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vtestpd, kX86InstIdVtestpd, X86YmmReg, X86Mem) + + //! Logical compare SP-FP (AVX). + INST_2x(vtestps, kX86InstIdVtestps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vtestps, kX86InstIdVtestps, X86XmmReg, X86Mem) + //! \overload + INST_2x(vtestps, kX86InstIdVtestps, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vtestps, kX86InstIdVtestps, X86YmmReg, X86Mem) + + //! Scalar DP-FP unordered compare and set EFLAGS (AVX). + INST_2x(vucomisd, kX86InstIdVucomisd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vucomisd, kX86InstIdVucomisd, X86XmmReg, X86Mem) + + //! Unordered scalar SP-FP compare and set EFLAGS (AVX). + INST_2x(vucomiss, kX86InstIdVucomiss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vucomiss, kX86InstIdVucomiss, X86XmmReg, X86Mem) + + //! Unpack and interleave high packed DP-FP (AVX). + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Unpack high packed SP-FP data (AVX). + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86YmmReg, X86YmmReg, X86Mem) + + //! Unpack and interleave low packed DP-FP (AVX). + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Unpack low packed SP-FP data (AVX). + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DP-FP bitwise xor (AVX). + INST_3x(vxorpd, kX86InstIdVxorpd, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vxorpd, kX86InstIdVxorpd, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vxorpd, kX86InstIdVxorpd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vxorpd, kX86InstIdVxorpd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed SP-FP bitwise xor (AVX). + INST_3x(vxorps, kX86InstIdVxorps, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vxorps, kX86InstIdVxorps, X86XmmReg, X86XmmReg, X86Mem) + //! \overload + INST_3x(vxorps, kX86InstIdVxorps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vxorps, kX86InstIdVxorps, X86YmmReg, X86YmmReg, X86Mem) + + //! Zero all YMM registers. + INST_0x(vzeroall, kX86InstIdVzeroall) + //! Zero upper 128-bits of all YMM registers. + INST_0x(vzeroupper, kX86InstIdVzeroupper) + + // -------------------------------------------------------------------------- + // [AVX+AESNI] + // -------------------------------------------------------------------------- + + //! Perform a single round of the AES decryption flow (AVX+AESNI). + INST_3x(vaesdec, kX86InstIdVaesdec, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaesdec, kX86InstIdVaesdec, X86XmmReg, X86XmmReg, X86Mem) + + //! Perform the last round of the AES decryption flow (AVX+AESNI). + INST_3x(vaesdeclast, kX86InstIdVaesdeclast, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaesdeclast, kX86InstIdVaesdeclast, X86XmmReg, X86XmmReg, X86Mem) + + //! Perform a single round of the AES encryption flow (AVX+AESNI). + INST_3x(vaesenc, kX86InstIdVaesenc, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaesenc, kX86InstIdVaesenc, X86XmmReg, X86XmmReg, X86Mem) + + //! Perform the last round of the AES encryption flow (AVX+AESNI). + INST_3x(vaesenclast, kX86InstIdVaesenclast, X86XmmReg, X86XmmReg, X86XmmReg) + //! \overload + INST_3x(vaesenclast, kX86InstIdVaesenclast, X86XmmReg, X86XmmReg, X86Mem) + + //! Perform the InvMixColumns transformation (AVX+AESNI). + INST_2x(vaesimc, kX86InstIdVaesimc, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vaesimc, kX86InstIdVaesimc, X86XmmReg, X86Mem) + + //! Assist in expanding the AES cipher key (AVX+AESNI). + INST_3i(vaeskeygenassist, kX86InstIdVaeskeygenassist, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vaeskeygenassist, kX86InstIdVaeskeygenassist, X86XmmReg, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX+PCLMULQDQ] + // -------------------------------------------------------------------------- + + //! Carry-less multiplication QWORD (AVX+PCLMULQDQ). + INST_4i(vpclmulqdq, kX86InstIdVpclmulqdq, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vpclmulqdq, kX86InstIdVpclmulqdq, X86XmmReg, X86XmmReg, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX2] + // -------------------------------------------------------------------------- + + //! Broadcast low 128-bit element in `o1` to `o0` (AVX2). + INST_2x(vbroadcasti128, kX86InstIdVbroadcasti128, X86YmmReg, X86Mem) + //! Broadcast low DP-FP element in `o1` to `o0` (AVX2). + INST_2x(vbroadcastsd, kX86InstIdVbroadcastsd, X86YmmReg, X86XmmReg) + //! Broadcast low SP-FP element in `o1` to `o0` (AVX2). + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86YmmReg, X86XmmReg) + + //! Extract 128-bit element from `o1` to `o0` based on selector (AVX2). + INST_3i(vextracti128, kX86InstIdVextracti128, X86XmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vextracti128, kX86InstIdVextracti128, X86Mem, X86YmmReg, Imm) + + //! Gather DP-FP from DWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherdpd, kX86InstIdVgatherdpd, X86XmmReg, X86Mem, X86XmmReg) + //! \overload + INST_3x(vgatherdpd, kX86InstIdVgatherdpd, X86YmmReg, X86Mem, X86YmmReg) + + //! Gather SP-FP from DWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherdps, kX86InstIdVgatherdps, X86XmmReg, X86Mem, X86XmmReg) + //! \overload + INST_3x(vgatherdps, kX86InstIdVgatherdps, X86YmmReg, X86Mem, X86YmmReg) + + //! Gather DP-FP from QWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherqpd, kX86InstIdVgatherqpd, X86XmmReg, X86Mem, X86XmmReg) + //! \overload + INST_3x(vgatherqpd, kX86InstIdVgatherqpd, X86YmmReg, X86Mem, X86YmmReg) + + //! Gather SP-FP from QWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherqps, kX86InstIdVgatherqps, X86XmmReg, X86Mem, X86XmmReg) + + //! Insert 128-bit of packed data based on selector (AVX2). + INST_4i(vinserti128, kX86InstIdVinserti128, X86YmmReg, X86YmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vinserti128, kX86InstIdVinserti128, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Load 256-bits aligned using NT hint (AVX2). + INST_2x(vmovntdqa, kX86InstIdVmovntdqa, X86YmmReg, X86Mem) + + //! Packed WORD sums of absolute difference (AVX2). + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed BYTE absolute value (AVX2). + INST_2x(vpabsb, kX86InstIdVpabsb, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vpabsb, kX86InstIdVpabsb, X86YmmReg, X86Mem) + + //! Packed DWORD absolute value (AVX2). + INST_2x(vpabsd, kX86InstIdVpabsd, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vpabsd, kX86InstIdVpabsd, X86YmmReg, X86Mem) + + //! Packed WORD absolute value (AVX2). + INST_2x(vpabsw, kX86InstIdVpabsw, X86YmmReg, X86YmmReg) + //! \overload + INST_2x(vpabsw, kX86InstIdVpabsw, X86YmmReg, X86Mem) + + //! Pack DWORDs to WORDs with signed saturation (AVX2). + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86YmmReg, X86YmmReg, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (AVX2). + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86YmmReg, X86YmmReg, X86Mem) + + //! Pack DWORDs to WORDs with unsigned saturation (AVX2). + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86YmmReg, X86YmmReg, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (AVX2). + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed BYTE add (AVX2). + INST_3x(vpaddb, kX86InstIdVpaddb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddb, kX86InstIdVpaddb, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DWORD add (AVX2). + INST_3x(vpaddd, kX86InstIdVpaddd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddd, kX86InstIdVpaddd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed QDWORD add (AVX2). + INST_3x(vpaddq, kX86InstIdVpaddq, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddq, kX86InstIdVpaddq, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed WORD add (AVX2). + INST_3x(vpaddw, kX86InstIdVpaddw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddw, kX86InstIdVpaddw, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed BYTE add with saturation (AVX2). + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed WORD add with saturation (AVX2). + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed BYTE add with unsigned saturation (AVX2). + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed WORD add with unsigned saturation (AVX2). + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed align right (AVX2). + INST_4i(vpalignr, kX86InstIdVpalignr, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vpalignr, kX86InstIdVpalignr, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed bitwise and (AVX2). + INST_3x(vpand, kX86InstIdVpand, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpand, kX86InstIdVpand, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed bitwise and-not (AVX2). + INST_3x(vpandn, kX86InstIdVpandn, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpandn, kX86InstIdVpandn, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed BYTE average (AVX2). + INST_3x(vpavgb, kX86InstIdVpavgb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpavgb, kX86InstIdVpavgb, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed WORD average (AVX2). + INST_3x(vpavgw, kX86InstIdVpavgw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpavgw, kX86InstIdVpavgw, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DWORD blend (AVX2). + INST_4i(vpblendd, kX86InstIdVpblendd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_4i(vpblendd, kX86InstIdVpblendd, X86XmmReg, X86XmmReg, X86Mem, Imm) + //! \overload + INST_4i(vpblendd, kX86InstIdVpblendd, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vpblendd, kX86InstIdVpblendd, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed DWORD variable blend (AVX2). + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + + //! Packed WORD blend (AVX2). + INST_4i(vpblendw, kX86InstIdVpblendw, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vpblendw, kX86InstIdVpblendw, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Broadcast BYTE from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86XmmReg, X86Mem) + //! Broadcast BYTE from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86YmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86YmmReg, X86Mem) + + //! Broadcast DWORD from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86XmmReg, X86Mem) + //! Broadcast DWORD from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86YmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86YmmReg, X86Mem) + + //! Broadcast QWORD from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86XmmReg, X86Mem) + //! Broadcast QWORD from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86YmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86YmmReg, X86Mem) + + //! Broadcast WORD from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86XmmReg, X86Mem) + //! Broadcast WORD from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86YmmReg, X86XmmReg) + //! \overload + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86YmmReg, X86Mem) + + //! Packed BYTEs compare for equality (AVX2). + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DWORDs compare for equality (AVX2). + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed QWORDs compare for equality (AVX2). + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed WORDs compare for equality (AVX2). + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed BYTEs compare if greater than (AVX2). + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DWORDs compare if greater than (AVX2). + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed QWORDs compare if greater than (AVX2). + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed WORDs compare if greater than (AVX2). + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DQWORD permute (AVX2). + INST_4i(vperm2i128, kX86InstIdVperm2i128, X86YmmReg, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_4i(vperm2i128, kX86InstIdVperm2i128, X86YmmReg, X86YmmReg, X86Mem, Imm) + + //! Packed DWORD permute (AVX2). + INST_3x(vpermd, kX86InstIdVpermd, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpermd, kX86InstIdVpermd, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed DP-FP permute (AVX2). + INST_3i(vpermpd, kX86InstIdVpermpd, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vpermpd, kX86InstIdVpermpd, X86YmmReg, X86Mem, Imm) + + //! Packed SP-FP permute (AVX2). + INST_3x(vpermps, kX86InstIdVpermps, X86YmmReg, X86YmmReg, X86YmmReg) + //! \overload + INST_3x(vpermps, kX86InstIdVpermps, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed QWORD permute (AVX2). + INST_3i(vpermq, kX86InstIdVpermq, X86YmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vpermq, kX86InstIdVpermq, X86YmmReg, X86Mem, Imm) + + INST_3x(vpgatherdd, kX86InstIdVpgatherdd, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpgatherdd, kX86InstIdVpgatherdd, X86YmmReg, X86Mem, X86YmmReg) + + INST_3x(vpgatherdq, kX86InstIdVpgatherdq, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpgatherdq, kX86InstIdVpgatherdq, X86YmmReg, X86Mem, X86YmmReg) + + INST_3x(vpgatherqd, kX86InstIdVpgatherqd, X86XmmReg, X86Mem, X86XmmReg) + + INST_3x(vpgatherqq, kX86InstIdVpgatherqq, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpgatherqq, kX86InstIdVpgatherqq, X86YmmReg, X86Mem, X86YmmReg) + + //! Packed DWORD horizontal add (AVX2). + INST_3x(vphaddd, kX86InstIdVphaddd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vphaddd, kX86InstIdVphaddd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD horizontal add with saturation (AVX2). + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD horizontal add (AVX2). + INST_3x(vphaddw, kX86InstIdVphaddw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vphaddw, kX86InstIdVphaddw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD horizontal subtract (AVX2). + INST_3x(vphsubd, kX86InstIdVphsubd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vphsubd, kX86InstIdVphsubd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD horizontal subtract with saturation (AVX2). + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD horizontal subtract (AVX2). + INST_3x(vphsubw, kX86InstIdVphsubw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vphsubw, kX86InstIdVphsubw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Move Byte mask to integer (AVX2). + INST_2x(vpmovmskb, kX86InstIdVpmovmskb, X86GpReg, X86YmmReg) + + //! BYTE to DWORD with sign extend (AVX). + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86YmmReg, X86XmmReg) + + //! Packed BYTE to QWORD with sign extend (AVX2). + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86YmmReg, X86XmmReg) + + //! Packed BYTE to WORD with sign extend (AVX2). + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86YmmReg, X86XmmReg) + + //! Packed DWORD to QWORD with sign extend (AVX2). + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86YmmReg, X86XmmReg) + + //! Packed WORD to DWORD with sign extend (AVX2). + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86YmmReg, X86XmmReg) + + //! Packed WORD to QWORD with sign extend (AVX2). + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86YmmReg, X86XmmReg) + + //! BYTE to DWORD with zero extend (AVX2). + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86YmmReg, X86XmmReg) + + //! Packed BYTE to QWORD with zero extend (AVX2). + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86YmmReg, X86XmmReg) + + //! BYTE to WORD with zero extend (AVX2). + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86YmmReg, X86XmmReg) + + //! Packed DWORD to QWORD with zero extend (AVX2). + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86YmmReg, X86XmmReg) + + //! Packed WORD to DWORD with zero extend (AVX2). + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86YmmReg, X86XmmReg) + + //! Packed WORD to QWORD with zero extend (AVX2). + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86YmmReg, X86Mem) + //! \overload + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86YmmReg, X86XmmReg) + + //! Packed multiply and add signed and unsigned bytes (AVX2). + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD multiply and add to packed DWORD (AVX2). + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86Mem, X86XmmReg, X86XmmReg) + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86Mem, X86YmmReg, X86YmmReg) + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86YmmReg, X86YmmReg, X86Mem) + + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86Mem, X86XmmReg, X86XmmReg) + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86Mem, X86YmmReg, X86YmmReg) + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86YmmReg, X86YmmReg, X86Mem) + + //! Packed BYTE maximum (AVX2). + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD maximum (AVX2). + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD maximum (AVX2). + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed BYTE unsigned maximum (AVX2). + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD unsigned maximum (AVX2). + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD unsigned maximum (AVX2). + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed BYTE minimum (AVX2). + INST_3x(vpminsb, kX86InstIdVpminsb, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpminsb, kX86InstIdVpminsb, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD minimum (AVX2). + INST_3x(vpminsd, kX86InstIdVpminsd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpminsd, kX86InstIdVpminsd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD minimum (AVX2). + INST_3x(vpminsw, kX86InstIdVpminsw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpminsw, kX86InstIdVpminsw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed BYTE unsigned minimum (AVX2). + INST_3x(vpminub, kX86InstIdVpminub, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpminub, kX86InstIdVpminub, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD unsigned minimum (AVX2). + INST_3x(vpminud, kX86InstIdVpminud, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpminud, kX86InstIdVpminud, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD unsigned minimum (AVX2). + INST_3x(vpminuw, kX86InstIdVpminuw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpminuw, kX86InstIdVpminuw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD to QWORD multiply (AVX2). + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD multiply high, round and scale (AVX2). + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD unsigned multiply high (AVX2). + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD multiply high (AVX2). + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD multiply low (AVX2). + INST_3x(vpmulld, kX86InstIdVpmulld, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmulld, kX86InstIdVpmulld, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORDs multiply low (AVX2). + INST_3x(vpmullw, kX86InstIdVpmullw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmullw, kX86InstIdVpmullw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD multiply to QWORD (AVX2). + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed bitwise or (AVX2). + INST_3x(vpor, kX86InstIdVpor, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpor, kX86InstIdVpor, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD sum of absolute differences (AVX2). + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed BYTE shuffle (AVX2). + INST_3x(vpshufb, kX86InstIdVpshufb, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpshufb, kX86InstIdVpshufb, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD shuffle (AVX2). + INST_3i(vpshufd, kX86InstIdVpshufd, X86YmmReg, X86Mem, Imm) + //! \overload + INST_3i(vpshufd, kX86InstIdVpshufd, X86YmmReg, X86YmmReg, Imm) + + //! Packed WORD shuffle high (AVX2). + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86YmmReg, X86Mem, Imm) + //! \overload + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86YmmReg, X86YmmReg, Imm) + + //! Packed WORD shuffle low (AVX2). + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86YmmReg, X86Mem, Imm) + //! \overload + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86YmmReg, X86YmmReg, Imm) + + //! Packed BYTE sign (AVX2). + INST_3x(vpsignb, kX86InstIdVpsignb, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsignb, kX86InstIdVpsignb, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD sign (AVX2). + INST_3x(vpsignd, kX86InstIdVpsignd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsignd, kX86InstIdVpsignd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD sign (AVX2). + INST_3x(vpsignw, kX86InstIdVpsignw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsignw, kX86InstIdVpsignw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD shift left logical (AVX2). + INST_3x(vpslld, kX86InstIdVpslld, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpslld, kX86InstIdVpslld, X86YmmReg, X86YmmReg, X86XmmReg) + //! \overload + INST_3i(vpslld, kX86InstIdVpslld, X86YmmReg, X86YmmReg, Imm) + + //! Packed DQWORD shift left logical (AVX2). + INST_3i(vpslldq, kX86InstIdVpslldq, X86YmmReg, X86YmmReg, Imm) + + //! Packed QWORD shift left logical (AVX2). + INST_3x(vpsllq, kX86InstIdVpsllq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsllq, kX86InstIdVpsllq, X86YmmReg, X86YmmReg, X86XmmReg) + //! \overload + INST_3i(vpsllq, kX86InstIdVpsllq, X86YmmReg, X86YmmReg, Imm) + + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD shift left logical (AVX2). + INST_3x(vpsllw, kX86InstIdVpsllw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsllw, kX86InstIdVpsllw, X86YmmReg, X86YmmReg, X86XmmReg) + //! Packed WORD shift left logical (AVX2). + INST_3i(vpsllw, kX86InstIdVpsllw, X86YmmReg, X86YmmReg, Imm) + + //! Packed DWORD shift right arithmetic (AVX2). + INST_3x(vpsrad, kX86InstIdVpsrad, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsrad, kX86InstIdVpsrad, X86YmmReg, X86YmmReg, X86XmmReg) + //! \overload + INST_3i(vpsrad, kX86InstIdVpsrad, X86YmmReg, X86YmmReg, Imm) + + INST_3x(vpsravd, kX86InstIdVpsravd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vpsravd, kX86InstIdVpsravd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpsravd, kX86InstIdVpsravd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vpsravd, kX86InstIdVpsravd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD shift right arithmetic (AVX2). + INST_3x(vpsraw, kX86InstIdVpsraw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsraw, kX86InstIdVpsraw, X86YmmReg, X86YmmReg, X86XmmReg) + //! \overload + INST_3i(vpsraw, kX86InstIdVpsraw, X86YmmReg, X86YmmReg, Imm) + + //! Packed DWORD shift right logical (AVX2). + INST_3x(vpsrld, kX86InstIdVpsrld, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsrld, kX86InstIdVpsrld, X86YmmReg, X86YmmReg, X86XmmReg) + //! \overload + INST_3i(vpsrld, kX86InstIdVpsrld, X86YmmReg, X86YmmReg, Imm) + + //! Scalar DQWORD shift right logical (AVX2). + INST_3i(vpsrldq, kX86InstIdVpsrldq, X86YmmReg, X86YmmReg, Imm) + + //! Packed QWORD shift right logical (AVX2). + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86YmmReg, X86YmmReg, X86XmmReg) + //! \overload + INST_3i(vpsrlq, kX86InstIdVpsrlq, X86YmmReg, X86YmmReg, Imm) + + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD shift right logical (AVX2). + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86YmmReg, X86YmmReg, X86XmmReg) + //! \overload + INST_3i(vpsrlw, kX86InstIdVpsrlw, X86YmmReg, X86YmmReg, Imm) + + //! Packed BYTE subtract (AVX2). + INST_3x(vpsubb, kX86InstIdVpsubb, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vpsubb, kX86InstIdVpsubb, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed DWORD subtract (AVX2). + INST_3x(vpsubd, kX86InstIdVpsubd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsubd, kX86InstIdVpsubd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed QWORD subtract (AVX2). + INST_3x(vpsubq, kX86InstIdVpsubq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsubq, kX86InstIdVpsubq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed BYTE subtract with saturation (AVX2). + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD subtract with saturation (AVX2). + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed BYTE subtract with unsigned saturation (AVX2). + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD subtract with unsigned saturation (AVX2). + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed WORD subtract (AVX2). + INST_3x(vpsubw, kX86InstIdVpsubw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpsubw, kX86InstIdVpsubw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack high packed BYTEs to WORDs (AVX2). + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack high packed DWORDs to QWORDs (AVX2). + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack high packed QWORDs to DQWORD (AVX2). + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack high packed WORDs to DWORDs (AVX2). + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack low packed BYTEs to WORDs (AVX2). + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack low packed DWORDs to QWORDs (AVX2). + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack low packed QWORDs to DQWORD (AVX2). + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Unpack low packed WORDs to DWORDs (AVX2). + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86YmmReg, X86YmmReg, X86YmmReg) + + //! Packed bitwise xor (AVX2). + INST_3x(vpxor, kX86InstIdVpxor, X86YmmReg, X86YmmReg, X86Mem) + //! \overload + INST_3x(vpxor, kX86InstIdVpxor, X86YmmReg, X86YmmReg, X86YmmReg) + + // -------------------------------------------------------------------------- + // [FMA3] + // -------------------------------------------------------------------------- + + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmadd132sd, kX86InstIdVfmadd132sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd132sd, kX86InstIdVfmadd132sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmadd132ss, kX86InstIdVfmadd132ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd132ss, kX86InstIdVfmadd132ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmadd213sd, kX86InstIdVfmadd213sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd213sd, kX86InstIdVfmadd213sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmadd213ss, kX86InstIdVfmadd213ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd213ss, kX86InstIdVfmadd213ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmadd231sd, kX86InstIdVfmadd231sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd231sd, kX86InstIdVfmadd231sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmadd231ss, kX86InstIdVfmadd231ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmadd231ss, kX86InstIdVfmadd231ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsub132sd, kX86InstIdVfmsub132sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub132sd, kX86InstIdVfmsub132sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmsub132ss, kX86InstIdVfmsub132ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub132ss, kX86InstIdVfmsub132ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsub213sd, kX86InstIdVfmsub213sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub213sd, kX86InstIdVfmsub213sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmsub213ss, kX86InstIdVfmsub213ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub213ss, kX86InstIdVfmsub213ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsub231sd, kX86InstIdVfmsub231sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub231sd, kX86InstIdVfmsub231sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmsub231ss, kX86InstIdVfmsub231ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsub231ss, kX86InstIdVfmsub231ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmadd132sd, kX86InstIdVfnmadd132sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd132sd, kX86InstIdVfnmadd132sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmadd132ss, kX86InstIdVfnmadd132ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd132ss, kX86InstIdVfnmadd132ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmadd213sd, kX86InstIdVfnmadd213sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd213sd, kX86InstIdVfnmadd213sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmadd213ss, kX86InstIdVfnmadd213ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd213ss, kX86InstIdVfnmadd213ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmadd231sd, kX86InstIdVfnmadd231sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd231sd, kX86InstIdVfnmadd231sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmadd231ss, kX86InstIdVfnmadd231ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmadd231ss, kX86InstIdVfnmadd231ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmsub132sd, kX86InstIdVfnmsub132sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub132sd, kX86InstIdVfnmsub132sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmsub132ss, kX86InstIdVfnmsub132ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub132ss, kX86InstIdVfnmsub132ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmsub213sd, kX86InstIdVfnmsub213sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub213sd, kX86InstIdVfnmsub213sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmsub213ss, kX86InstIdVfnmsub213ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub213ss, kX86InstIdVfnmsub213ss, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86YmmReg, X86YmmReg, X86Mem) + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86YmmReg, X86YmmReg, X86YmmReg) + + INST_3x(vfnmsub231sd, kX86InstIdVfnmsub231sd, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub231sd, kX86InstIdVfnmsub231sd, X86XmmReg, X86XmmReg, X86XmmReg) + + INST_3x(vfnmsub231ss, kX86InstIdVfnmsub231ss, X86XmmReg, X86XmmReg, X86Mem) + INST_3x(vfnmsub231ss, kX86InstIdVfnmsub231ss, X86XmmReg, X86XmmReg, X86XmmReg) + + // -------------------------------------------------------------------------- + // [FMA4] + // -------------------------------------------------------------------------- + + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmaddsd, kX86InstIdVfmaddsd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmaddsd, kX86InstIdVfmaddsd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmaddsd, kX86InstIdVfmaddsd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_4x(vfmaddss, kX86InstIdVfmaddss, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmaddss, kX86InstIdVfmaddss, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmaddss, kX86InstIdVfmaddss, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfmsubsd, kX86InstIdVfmsubsd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmsubsd, kX86InstIdVfmsubsd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmsubsd, kX86InstIdVfmsubsd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_4x(vfmsubss, kX86InstIdVfmsubss, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfmsubss, kX86InstIdVfmsubss, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfmsubss, kX86InstIdVfmsubss, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfnmaddsd, kX86InstIdVfnmaddsd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmaddsd, kX86InstIdVfnmaddsd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmaddsd, kX86InstIdVfnmaddsd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_4x(vfnmaddss, kX86InstIdVfnmaddss, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmaddss, kX86InstIdVfnmaddss, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmaddss, kX86InstIdVfnmaddss, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vfnmsubsd, kX86InstIdVfnmsubsd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmsubsd, kX86InstIdVfnmsubsd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmsubsd, kX86InstIdVfnmsubsd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_4x(vfnmsubss, kX86InstIdVfnmsubss, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vfnmsubss, kX86InstIdVfnmsubss, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vfnmsubss, kX86InstIdVfnmsubss, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + // -------------------------------------------------------------------------- + // [XOP] + // -------------------------------------------------------------------------- + + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86XmmReg, X86XmmReg) + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86XmmReg, X86Mem) + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86YmmReg, X86YmmReg) + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86YmmReg, X86Mem) + + INST_2x(vfrczps, kX86InstIdVfrczps, X86XmmReg, X86XmmReg) + INST_2x(vfrczps, kX86InstIdVfrczps, X86XmmReg, X86Mem) + INST_2x(vfrczps, kX86InstIdVfrczps, X86YmmReg, X86YmmReg) + INST_2x(vfrczps, kX86InstIdVfrczps, X86YmmReg, X86Mem) + + INST_2x(vfrczsd, kX86InstIdVfrczsd, X86XmmReg, X86XmmReg) + INST_2x(vfrczsd, kX86InstIdVfrczsd, X86XmmReg, X86Mem) + + INST_2x(vfrczss, kX86InstIdVfrczss, X86XmmReg, X86XmmReg) + INST_2x(vfrczss, kX86InstIdVfrczss, X86XmmReg, X86Mem) + + INST_4x(vpcmov, kX86InstIdVpcmov, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpcmov, kX86InstIdVpcmov, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpcmov, kX86InstIdVpcmov, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vpcmov, kX86InstIdVpcmov, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vpcmov, kX86InstIdVpcmov, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vpcmov, kX86InstIdVpcmov, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4i(vpcomb, kX86InstIdVpcomb, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomb, kX86InstIdVpcomb, X86XmmReg, X86XmmReg, X86Mem, Imm) + INST_4i(vpcomd, kX86InstIdVpcomd, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomd, kX86InstIdVpcomd, X86XmmReg, X86XmmReg, X86Mem, Imm) + INST_4i(vpcomq, kX86InstIdVpcomq, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomq, kX86InstIdVpcomq, X86XmmReg, X86XmmReg, X86Mem, Imm) + INST_4i(vpcomw, kX86InstIdVpcomw, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomw, kX86InstIdVpcomw, X86XmmReg, X86XmmReg, X86Mem, Imm) + + INST_4i(vpcomub, kX86InstIdVpcomub, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomub, kX86InstIdVpcomub, X86XmmReg, X86XmmReg, X86Mem, Imm) + INST_4i(vpcomud, kX86InstIdVpcomud, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomud, kX86InstIdVpcomud, X86XmmReg, X86XmmReg, X86Mem, Imm) + INST_4i(vpcomuq, kX86InstIdVpcomuq, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomuq, kX86InstIdVpcomuq, X86XmmReg, X86XmmReg, X86Mem, Imm) + INST_4i(vpcomuw, kX86InstIdVpcomuw, X86XmmReg, X86XmmReg, X86XmmReg, Imm) + INST_4i(vpcomuw, kX86InstIdVpcomuw, X86XmmReg, X86XmmReg, X86Mem, Imm) + + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86YmmReg, X86YmmReg, X86YmmReg, X86YmmReg) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86YmmReg, X86YmmReg, X86Mem, X86YmmReg) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86YmmReg, X86YmmReg, X86YmmReg, X86Mem) + + INST_2x(vphaddbd, kX86InstIdVphaddbd, X86XmmReg, X86XmmReg) + INST_2x(vphaddbd, kX86InstIdVphaddbd, X86XmmReg, X86Mem) + INST_2x(vphaddbq, kX86InstIdVphaddbq, X86XmmReg, X86XmmReg) + INST_2x(vphaddbq, kX86InstIdVphaddbq, X86XmmReg, X86Mem) + INST_2x(vphaddbw, kX86InstIdVphaddbw, X86XmmReg, X86XmmReg) + INST_2x(vphaddbw, kX86InstIdVphaddbw, X86XmmReg, X86Mem) + INST_2x(vphadddq, kX86InstIdVphadddq, X86XmmReg, X86XmmReg) + INST_2x(vphadddq, kX86InstIdVphadddq, X86XmmReg, X86Mem) + INST_2x(vphaddwd, kX86InstIdVphaddwd, X86XmmReg, X86XmmReg) + INST_2x(vphaddwd, kX86InstIdVphaddwd, X86XmmReg, X86Mem) + INST_2x(vphaddwq, kX86InstIdVphaddwq, X86XmmReg, X86XmmReg) + INST_2x(vphaddwq, kX86InstIdVphaddwq, X86XmmReg, X86Mem) + + INST_2x(vphaddubd, kX86InstIdVphaddubd, X86XmmReg, X86XmmReg) + INST_2x(vphaddubd, kX86InstIdVphaddubd, X86XmmReg, X86Mem) + INST_2x(vphaddubq, kX86InstIdVphaddubq, X86XmmReg, X86XmmReg) + INST_2x(vphaddubq, kX86InstIdVphaddubq, X86XmmReg, X86Mem) + INST_2x(vphaddubw, kX86InstIdVphaddubw, X86XmmReg, X86XmmReg) + INST_2x(vphaddubw, kX86InstIdVphaddubw, X86XmmReg, X86Mem) + INST_2x(vphaddudq, kX86InstIdVphaddudq, X86XmmReg, X86XmmReg) + INST_2x(vphaddudq, kX86InstIdVphaddudq, X86XmmReg, X86Mem) + INST_2x(vphadduwd, kX86InstIdVphadduwd, X86XmmReg, X86XmmReg) + INST_2x(vphadduwd, kX86InstIdVphadduwd, X86XmmReg, X86Mem) + INST_2x(vphadduwq, kX86InstIdVphadduwq, X86XmmReg, X86XmmReg) + INST_2x(vphadduwq, kX86InstIdVphadduwq, X86XmmReg, X86Mem) + + INST_2x(vphsubbw, kX86InstIdVphsubbw, X86XmmReg, X86XmmReg) + INST_2x(vphsubbw, kX86InstIdVphsubbw, X86XmmReg, X86Mem) + INST_2x(vphsubdq, kX86InstIdVphsubdq, X86XmmReg, X86XmmReg) + INST_2x(vphsubdq, kX86InstIdVphsubdq, X86XmmReg, X86Mem) + INST_2x(vphsubwd, kX86InstIdVphsubwd, X86XmmReg, X86XmmReg) + INST_2x(vphsubwd, kX86InstIdVphsubwd, X86XmmReg, X86Mem) + + INST_4x(vpmacsdd, kX86InstIdVpmacsdd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacsdd, kX86InstIdVpmacsdd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacsdqh, kX86InstIdVpmacsdqh, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacsdqh, kX86InstIdVpmacsdqh, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacsdql, kX86InstIdVpmacsdql, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacsdql, kX86InstIdVpmacsdql, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacswd, kX86InstIdVpmacswd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacswd, kX86InstIdVpmacswd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacsww, kX86InstIdVpmacsww, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacsww, kX86InstIdVpmacsww, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + + INST_4x(vpmacssdd, kX86InstIdVpmacssdd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacssdd, kX86InstIdVpmacssdd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacssdqh, kX86InstIdVpmacssdqh, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacssdqh, kX86InstIdVpmacssdqh, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacssdql, kX86InstIdVpmacssdql, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacssdql, kX86InstIdVpmacssdql, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacsswd, kX86InstIdVpmacsswd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacsswd, kX86InstIdVpmacsswd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpmacssww, kX86InstIdVpmacssww, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmacssww, kX86InstIdVpmacssww, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + + INST_4x(vpmadcsswd, kX86InstIdVpmadcsswd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmadcsswd, kX86InstIdVpmadcsswd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + + INST_4x(vpmadcswd, kX86InstIdVpmadcswd, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpmadcswd, kX86InstIdVpmadcswd, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + + INST_4x(vpperm, kX86InstIdVpperm, X86XmmReg, X86XmmReg, X86XmmReg, X86XmmReg) + INST_4x(vpperm, kX86InstIdVpperm, X86XmmReg, X86XmmReg, X86Mem, X86XmmReg) + INST_4x(vpperm, kX86InstIdVpperm, X86XmmReg, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vprotb, kX86InstIdVprotb, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vprotb, kX86InstIdVprotb, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vprotb, kX86InstIdVprotb, X86XmmReg, X86XmmReg, X86Mem) + INST_3i(vprotb, kX86InstIdVprotb, X86XmmReg, X86XmmReg, Imm) + INST_3i(vprotb, kX86InstIdVprotb, X86XmmReg, X86Mem, Imm) + + INST_3x(vprotd, kX86InstIdVprotd, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vprotd, kX86InstIdVprotd, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vprotd, kX86InstIdVprotd, X86XmmReg, X86XmmReg, X86Mem) + INST_3i(vprotd, kX86InstIdVprotd, X86XmmReg, X86XmmReg, Imm) + INST_3i(vprotd, kX86InstIdVprotd, X86XmmReg, X86Mem, Imm) + + INST_3x(vprotq, kX86InstIdVprotq, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vprotq, kX86InstIdVprotq, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vprotq, kX86InstIdVprotq, X86XmmReg, X86XmmReg, X86Mem) + INST_3i(vprotq, kX86InstIdVprotq, X86XmmReg, X86XmmReg, Imm) + INST_3i(vprotq, kX86InstIdVprotq, X86XmmReg, X86Mem, Imm) + + INST_3x(vprotw, kX86InstIdVprotw, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vprotw, kX86InstIdVprotw, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vprotw, kX86InstIdVprotw, X86XmmReg, X86XmmReg, X86Mem) + INST_3i(vprotw, kX86InstIdVprotw, X86XmmReg, X86XmmReg, Imm) + INST_3i(vprotw, kX86InstIdVprotw, X86XmmReg, X86Mem, Imm) + + INST_3x(vpshab, kX86InstIdVpshab, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshab, kX86InstIdVpshab, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshab, kX86InstIdVpshab, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vpshad, kX86InstIdVpshad, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshad, kX86InstIdVpshad, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshad, kX86InstIdVpshad, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vpshaq, kX86InstIdVpshaq, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshaq, kX86InstIdVpshaq, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshaq, kX86InstIdVpshaq, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vpshaw, kX86InstIdVpshaw, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshaw, kX86InstIdVpshaw, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshaw, kX86InstIdVpshaw, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vpshlb, kX86InstIdVpshlb, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshlb, kX86InstIdVpshlb, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshlb, kX86InstIdVpshlb, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vpshld, kX86InstIdVpshld, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshld, kX86InstIdVpshld, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshld, kX86InstIdVpshld, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vpshlq, kX86InstIdVpshlq, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshlq, kX86InstIdVpshlq, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshlq, kX86InstIdVpshlq, X86XmmReg, X86XmmReg, X86Mem) + + INST_3x(vpshlw, kX86InstIdVpshlw, X86XmmReg, X86XmmReg, X86XmmReg) + INST_3x(vpshlw, kX86InstIdVpshlw, X86XmmReg, X86Mem, X86XmmReg) + INST_3x(vpshlw, kX86InstIdVpshlw, X86XmmReg, X86XmmReg, X86Mem) + + // -------------------------------------------------------------------------- + // [F16C] + // -------------------------------------------------------------------------- + + //! Convert packed HP-FP to SP-FP. + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86XmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86XmmReg, X86Mem) + //! \overload + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86YmmReg, X86XmmReg) + //! \overload + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86YmmReg, X86Mem) + + //! Convert packed SP-FP to HP-FP. + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86XmmReg, X86XmmReg, Imm) + //! \overload + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86Mem, X86XmmReg, Imm) + //! \overload + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86XmmReg, X86YmmReg, Imm) + //! \overload + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86Mem, X86YmmReg, Imm) + +#undef INST_0x + +#undef INST_1x +#undef INST_1i +#undef INST_1cc + +#undef INST_2x +#undef INST_2i +#undef INST_2cc + +#undef INST_3x +#undef INST_3i +#undef INST_3ii + +#undef INST_4x +#undef INST_4i +#undef INST_4ii +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86ASSEMBLER_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86compiler.cpp b/DynamicHooks/thirdparty/AsmJit/x86/x86compiler.cpp new file mode 100644 index 0000000..0c51cd0 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86compiler.cpp @@ -0,0 +1,860 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) && (defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64)) + +// [Dependencies] +#include "../base/containers.h" +#include "../base/utils.h" +#include "../x86/x86assembler.h" +#include "../x86/x86compiler.h" +#include "../x86/x86compilercontext_p.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Debug] +// ============================================================================ + +#if !defined(ASMJIT_DEBUG) +#define ASMJIT_ASSERT_OPERAND(op) \ + do {} while(0) +#else +#define ASMJIT_ASSERT_OPERAND(op) \ + do { \ + if (op.isVar() || op.isLabel()) { \ + ASMJIT_ASSERT(op.getId() != kInvalidValue); \ + } \ + } while(0) +#endif + +// ============================================================================ +// [asmjit::X86VarInfo] +// ============================================================================ + +#define F(flag) VarInfo::kFlag##flag +const VarInfo _x86VarInfo[] = { + { kVarTypeInt8 , 1 , kX86RegClassGp , kX86RegTypeGpbLo, 0 , "gpb" }, + { kVarTypeUInt8 , 1 , kX86RegClassGp , kX86RegTypeGpbLo, 0 , "gpb" }, + { kVarTypeInt16 , 2 , kX86RegClassGp , kX86RegTypeGpw , 0 , "gpw" }, + { kVarTypeUInt16 , 2 , kX86RegClassGp , kX86RegTypeGpw , 0 , "gpw" }, + { kVarTypeInt32 , 4 , kX86RegClassGp , kX86RegTypeGpd , 0 , "gpd" }, + { kVarTypeUInt32 , 4 , kX86RegClassGp , kX86RegTypeGpd , 0 , "gpd" }, + { kVarTypeInt64 , 8 , kX86RegClassGp , kX86RegTypeGpq , 0 , "gpq" }, + { kVarTypeUInt64 , 8 , kX86RegClassGp , kX86RegTypeGpq , 0 , "gpq" }, + { kVarTypeIntPtr , 0 , kX86RegClassGp , 0 , 0 , "" }, // Abstract. + { kVarTypeUIntPtr , 0 , kX86RegClassGp , 0 , 0 , "" }, // Abstract. + { kVarTypeFp32 , 4 , kX86RegClassFp , kX86RegTypeFp , F(SP) , "fp" }, + { kVarTypeFp64 , 8 , kX86RegClassFp , kX86RegTypeFp , F(DP) , "fp" }, + { kX86VarTypeMm , 8 , kX86RegClassMm , kX86RegTypeMm , 0 | F(SIMD), "mm" }, + { kX86VarTypeK , 8 , kX86RegClassK , kX86RegTypeK , 0 , "k" }, + { kX86VarTypeXmm , 16, kX86RegClassXyz, kX86RegTypeXmm , 0 | F(SIMD), "xmm" }, + { kX86VarTypeXmmSs, 4 , kX86RegClassXyz, kX86RegTypeXmm , F(SP) , "xmm" }, + { kX86VarTypeXmmPs, 16, kX86RegClassXyz, kX86RegTypeXmm , F(SP) | F(SIMD), "xmm" }, + { kX86VarTypeXmmSd, 8 , kX86RegClassXyz, kX86RegTypeXmm , F(DP) , "xmm" }, + { kX86VarTypeXmmPd, 16, kX86RegClassXyz, kX86RegTypeXmm , F(DP) | F(SIMD), "xmm" }, + { kX86VarTypeYmm , 32, kX86RegClassXyz, kX86RegTypeYmm , 0 | F(SIMD), "ymm" }, + { kX86VarTypeYmmPs, 32, kX86RegClassXyz, kX86RegTypeYmm , F(SP) | F(SIMD), "ymm" }, + { kX86VarTypeYmmPd, 32, kX86RegClassXyz, kX86RegTypeYmm , F(DP) | F(SIMD), "ymm" }, + { kX86VarTypeZmm , 64, kX86RegClassXyz, kX86RegTypeZmm , 0 | F(SIMD), "zmm" }, + { kX86VarTypeZmmPs, 64, kX86RegClassXyz, kX86RegTypeZmm , F(SP) | F(SIMD), "zmm" }, + { kX86VarTypeZmmPd, 64, kX86RegClassXyz, kX86RegTypeZmm , F(DP) | F(SIMD), "zmm" } +}; +#undef F + +#if defined(ASMJIT_BUILD_X86) +const uint8_t _x86VarMapping[kX86VarTypeCount] = { + /* 00: kVarTypeInt8 */ kVarTypeInt8, + /* 01: kVarTypeUInt8 */ kVarTypeUInt8, + /* 02: kVarTypeInt16 */ kVarTypeInt16, + /* 03: kVarTypeUInt16 */ kVarTypeUInt16, + /* 04: kVarTypeInt32 */ kVarTypeInt32, + /* 05: kVarTypeUInt32 */ kVarTypeUInt32, + /* 06: kVarTypeInt64 */ kInvalidVar, // Invalid in 32-bit mode. + /* 07: kVarTypeUInt64 */ kInvalidVar, // Invalid in 32-bit mode. + /* 08: kVarTypeIntPtr */ kVarTypeInt32, // Remapped to Int32. + /* 09: kVarTypeUIntPtr */ kVarTypeUInt32, // Remapped to UInt32. + /* 10: kVarTypeFp32 */ kVarTypeFp32, + /* 11: kVarTypeFp64 */ kVarTypeFp64, + /* 12: kX86VarTypeMm */ kX86VarTypeMm, + /* 13: kX86VarTypeK */ kX86VarTypeK, + /* 14: kX86VarTypeXmm */ kX86VarTypeXmm, + /* 15: kX86VarTypeXmmSs */ kX86VarTypeXmmSs, + /* 16: kX86VarTypeXmmPs */ kX86VarTypeXmmPs, + /* 17: kX86VarTypeXmmSd */ kX86VarTypeXmmSd, + /* 18: kX86VarTypeXmmPd */ kX86VarTypeXmmPd, + /* 19: kX86VarTypeYmm */ kX86VarTypeYmm, + /* 20: kX86VarTypeYmmPs */ kX86VarTypeYmmPs, + /* 21: kX86VarTypeYmmPd */ kX86VarTypeYmmPd, + /* 22: kX86VarTypeZmm */ kX86VarTypeZmm, + /* 23: kX86VarTypeZmmPs */ kX86VarTypeZmmPs, + /* 24: kX86VarTypeZmmPd */ kX86VarTypeZmmPd +}; +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) +const uint8_t _x64VarMapping[kX86VarTypeCount] = { + /* 00: kVarTypeInt8 */ kVarTypeInt8, + /* 01: kVarTypeUInt8 */ kVarTypeUInt8, + /* 02: kVarTypeInt16 */ kVarTypeInt16, + /* 03: kVarTypeUInt16 */ kVarTypeUInt16, + /* 04: kVarTypeInt32 */ kVarTypeInt32, + /* 05: kVarTypeUInt32 */ kVarTypeUInt32, + /* 06: kVarTypeInt64 */ kVarTypeInt64, + /* 07: kVarTypeUInt64 */ kVarTypeUInt64, + /* 08: kVarTypeIntPtr */ kVarTypeInt64, // Remapped to Int64. + /* 09: kVarTypeUIntPtr */ kVarTypeUInt64, // Remapped to UInt64. + /* 10: kVarTypeFp32 */ kVarTypeFp32, + /* 11: kVarTypeFp64 */ kVarTypeFp64, + /* 12: kX86VarTypeMm */ kX86VarTypeMm, + /* 13: kX86VarTypeK */ kX86VarTypeK, + /* 14: kX86VarTypeXmm */ kX86VarTypeXmm, + /* 15: kX86VarTypeXmmSs */ kX86VarTypeXmmSs, + /* 16: kX86VarTypeXmmPs */ kX86VarTypeXmmPs, + /* 17: kX86VarTypeXmmSd */ kX86VarTypeXmmSd, + /* 18: kX86VarTypeXmmPd */ kX86VarTypeXmmPd, + /* 19: kX86VarTypeYmm */ kX86VarTypeYmm, + /* 20: kX86VarTypeYmmPs */ kX86VarTypeYmmPs, + /* 21: kX86VarTypeYmmPd */ kX86VarTypeYmmPd, + /* 22: kX86VarTypeZmm */ kX86VarTypeZmm, + /* 23: kX86VarTypeZmmPs */ kX86VarTypeZmmPs, + /* 24: kX86VarTypeZmmPd */ kX86VarTypeZmmPd +}; +#endif // ASMJIT_BUILD_X64 + +// ============================================================================ +// [asmjit::X86CallNode - Arg / Ret] +// ============================================================================ + +bool X86CallNode::_setArg(uint32_t i, const Operand& op) noexcept { + if ((i & ~kFuncArgHi) >= _x86Decl.getNumArgs()) + return false; + + _args[i] = op; + return true; +} + +bool X86CallNode::_setRet(uint32_t i, const Operand& op) noexcept { + if (i >= 2) + return false; + + _ret[i] = op; + return true; +} + +// ============================================================================ +// [asmjit::X86Compiler - Construction / Destruction] +// ============================================================================ + +X86Compiler::X86Compiler(X86Assembler* assembler) noexcept + : Compiler(), + zax(NoInit), + zcx(NoInit), + zdx(NoInit), + zbx(NoInit), + zsp(NoInit), + zbp(NoInit), + zsi(NoInit), + zdi(NoInit) { + + _regCount.reset(); + zax = x86::noGpReg; + zcx = x86::noGpReg; + zdx = x86::noGpReg; + zbx = x86::noGpReg; + zsp = x86::noGpReg; + zbp = x86::noGpReg; + zsi = x86::noGpReg; + zdi = x86::noGpReg; + + if (assembler != nullptr) + attach(assembler); +} + +X86Compiler::~X86Compiler() noexcept { + reset(true); +} + +// ============================================================================ +// [asmjit::X86Compiler - Attach / Reset] +// ============================================================================ + +Error X86Compiler::attach(Assembler* assembler) noexcept { + ASMJIT_ASSERT(assembler != nullptr); + + if (_assembler != nullptr) + return kErrorInvalidState; + + uint32_t arch = assembler->getArch(); + switch (arch) { +#if defined(ASMJIT_BUILD_X86) + case kArchX86: + _targetVarMapping = _x86VarMapping; + break; +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) + case kArchX64: + _targetVarMapping = _x64VarMapping; + break; +#endif // ASMJIT_BUILD_X64 + + default: + return kErrorInvalidArch; + } + + assembler->_attached(this); + + _arch = static_cast(arch); + _regSize = static_cast(assembler->getRegSize()); + _regCount = static_cast(assembler)->getRegCount(); + _finalized = false; + + zax = static_cast(assembler)->zax; + zcx = static_cast(assembler)->zcx; + zdx = static_cast(assembler)->zdx; + zbx = static_cast(assembler)->zbx; + zsp = static_cast(assembler)->zsp; + zbp = static_cast(assembler)->zbp; + zsi = static_cast(assembler)->zsi; + zdi = static_cast(assembler)->zdi; + + return kErrorOk; +} + +void X86Compiler::reset(bool releaseMemory) noexcept { + Compiler::reset(releaseMemory); + + _regCount.reset(); + zax = x86::noGpReg; + zcx = x86::noGpReg; + zdx = x86::noGpReg; + zbx = x86::noGpReg; + zsp = x86::noGpReg; + zbp = x86::noGpReg; + zsi = x86::noGpReg; + zdi = x86::noGpReg; +} + +// ============================================================================ +// [asmjit::X86Compiler - Finalize] +// ============================================================================ + +Error X86Compiler::finalize() noexcept { + X86Assembler* assembler = getAssembler(); + if (assembler == nullptr) + return kErrorOk; + + // Flush the global constant pool. + if (_globalConstPoolLabel.isInitialized()) { + embedConstPool(_globalConstPoolLabel, _globalConstPool); + + _globalConstPoolLabel.reset(); + _globalConstPool.reset(); + } + + if (_firstNode == nullptr) + return kErrorOk; + + X86Context context(this); + Error error = kErrorOk; + + HLNode* node = _firstNode; + HLNode* start; + + // Find all functions and use the `X86Context` to translate/emit them. + do { + start = node; + _resetTokenGenerator(); + + if (node->getType() == HLNode::kTypeFunc) { + node = static_cast(start)->getEnd(); + error = context.compile(static_cast(start)); + + if (error != kErrorOk) + break; + } + + do { + node = node->getNext(); + } while (node != nullptr && node->getType() != HLNode::kTypeFunc); + + error = context.serialize(assembler, start, node); + context.cleanup(); + context.reset(false); + + if (error != kErrorOk) + break; + } while (node != nullptr); + + reset(false); + return error; +} + +// ============================================================================ +// [asmjit::X86Compiler - Inst] +// ============================================================================ + +//! Get compiler instruction item size without operands assigned. +static ASMJIT_INLINE size_t X86Compiler_getInstSize(uint32_t code) noexcept { + return Utils::inInterval(code, _kX86InstIdJbegin, _kX86InstIdJend) ? sizeof(HLJump) : sizeof(HLInst); +} + +static HLInst* X86Compiler_newInst(X86Compiler* self, void* p, uint32_t code, uint32_t options, Operand* opList, uint32_t opCount) noexcept { + if (Utils::inInterval(code, _kX86InstIdJbegin, _kX86InstIdJend)) { + HLJump* node = new(p) HLJump(self, code, options, opList, opCount); + HLLabel* jTarget = nullptr; + + if ((options & kInstOptionUnfollow) == 0) { + if (opList[0].isLabel()) + jTarget = self->getHLLabel(static_cast(opList[0])); + else + options |= kInstOptionUnfollow; + } + + node->orFlags(code == kX86InstIdJmp ? HLNode::kFlagIsJmp | HLNode::kFlagIsTaken : HLNode::kFlagIsJcc); + node->_target = jTarget; + node->_jumpNext = nullptr; + + if (jTarget) { + node->_jumpNext = static_cast(jTarget->_from); + jTarget->_from = node; + jTarget->addNumRefs(); + } + + // The 'jmp' is always taken, conditional jump can contain hint, we detect it. + if (code == kX86InstIdJmp) + node->orFlags(HLNode::kFlagIsTaken); + else if (options & kInstOptionTaken) + node->orFlags(HLNode::kFlagIsTaken); + + node->addOptions(options); + return node; + } + else { + HLInst* node = new(p) HLInst(self, code, options, opList, opCount); + node->addOptions(options); + return node; + } +} + +HLInst* X86Compiler::newInst(uint32_t code) noexcept { + size_t size = X86Compiler_getInstSize(code); + HLInst* inst = static_cast(_zoneAllocator.alloc(size)); + + if (inst == nullptr) + goto _NoMemory; + + return X86Compiler_newInst(this, inst, code, getInstOptionsAndReset(), nullptr, 0); + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +HLInst* X86Compiler::newInst(uint32_t code, const Operand& o0) noexcept { + size_t size = X86Compiler_getInstSize(code); + HLInst* inst = static_cast(_zoneAllocator.alloc(size + 1 * sizeof(Operand))); + + if (inst == nullptr) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + ASMJIT_ASSERT_OPERAND(o0); + return X86Compiler_newInst(this, inst, code, getInstOptionsAndReset(), opList, 1); + } + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +HLInst* X86Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1) noexcept { + size_t size = X86Compiler_getInstSize(code); + HLInst* inst = static_cast(_zoneAllocator.alloc(size + 2 * sizeof(Operand))); + + if (inst == nullptr) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + ASMJIT_ASSERT_OPERAND(o0); + ASMJIT_ASSERT_OPERAND(o1); + return X86Compiler_newInst(this, inst, code, getInstOptionsAndReset(), opList, 2); + } + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +HLInst* X86Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) noexcept { + size_t size = X86Compiler_getInstSize(code); + HLInst* inst = static_cast(_zoneAllocator.alloc(size + 3 * sizeof(Operand))); + + if (inst == nullptr) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + opList[2] = o2; + ASMJIT_ASSERT_OPERAND(o0); + ASMJIT_ASSERT_OPERAND(o1); + ASMJIT_ASSERT_OPERAND(o2); + return X86Compiler_newInst(this, inst, code, getInstOptionsAndReset(), opList, 3); + } + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +HLInst* X86Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) noexcept { + size_t size = X86Compiler_getInstSize(code); + HLInst* inst = static_cast(_zoneAllocator.alloc(size + 4 * sizeof(Operand))); + + if (inst == nullptr) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + opList[2] = o2; + opList[3] = o3; + ASMJIT_ASSERT_OPERAND(o0); + ASMJIT_ASSERT_OPERAND(o1); + ASMJIT_ASSERT_OPERAND(o2); + ASMJIT_ASSERT_OPERAND(o3); + return X86Compiler_newInst(this, inst, code, getInstOptionsAndReset(), opList, 4); + } + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +HLInst* X86Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4) noexcept { + size_t size = X86Compiler_getInstSize(code); + HLInst* inst = static_cast(_zoneAllocator.alloc(size + 5 * sizeof(Operand))); + + if (inst == nullptr) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + opList[2] = o2; + opList[3] = o3; + opList[4] = o4; + ASMJIT_ASSERT_OPERAND(o0); + ASMJIT_ASSERT_OPERAND(o1); + ASMJIT_ASSERT_OPERAND(o2); + ASMJIT_ASSERT_OPERAND(o3); + ASMJIT_ASSERT_OPERAND(o4); + return X86Compiler_newInst(this, inst, code, getInstOptionsAndReset(), opList, 5); + } + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +HLInst* X86Compiler::emit(uint32_t code) noexcept { + HLInst* node = newInst(code); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0) noexcept { + HLInst* node = newInst(code, o0); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1) noexcept { + HLInst* node = newInst(code, o0, o1); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) noexcept { + HLInst* node = newInst(code, o0, o1, o2); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) noexcept { + HLInst* node = newInst(code, o0, o1, o2, o3); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4) noexcept { + HLInst* node = newInst(code, o0, o1, o2, o3, o4); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, int o0_) noexcept { + Imm o0(o0_); + HLInst* node = newInst(code, o0); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, uint64_t o0_) noexcept { + Imm o0(o0_); + HLInst* node = newInst(code, o0); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, int o1_) noexcept { + Imm o1(o1_); + HLInst* node = newInst(code, o0, o1); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, uint64_t o1_) noexcept { + Imm o1(o1_); + HLInst* node = newInst(code, o0, o1); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, int o2_) noexcept { + Imm o2(o2_); + HLInst* node = newInst(code, o0, o1, o2); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, uint64_t o2_) noexcept { + Imm o2(o2_); + HLInst* node = newInst(code, o0, o1, o2); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3_) noexcept { + Imm o3(o3_); + HLInst* node = newInst(code, o0, o1, o2, o3); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +HLInst* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, uint64_t o3_) noexcept { + Imm o3(o3_); + HLInst* node = newInst(code, o0, o1, o2, o3); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::X86Compiler - Func] +// ============================================================================ + +X86FuncNode* X86Compiler::newFunc(const FuncPrototype& p) noexcept { + X86FuncNode* func = newNode(); + Error error; + + if (func == nullptr) + goto _NoMemory; + + // Create helper nodes. + func->_entryNode = newLabelNode(); + func->_exitNode = newLabelNode(); + func->_end = newNode(); + + if (func->_entryNode == nullptr || func->_exitNode == nullptr || func->_end == nullptr) + goto _NoMemory; + + // Function prototype. + if ((error = func->_x86Decl.setPrototype(p)) != kErrorOk) { + setLastError(error); + return nullptr; + } + + // Function arguments stack size. Since function requires _argStackSize to be + // set, we have to copy it from X86FuncDecl. + func->_argStackSize = func->_x86Decl.getArgStackSize(); + func->_redZoneSize = static_cast(func->_x86Decl.getRedZoneSize()); + func->_spillZoneSize = static_cast(func->_x86Decl.getSpillZoneSize()); + + // Expected/Required stack alignment. + func->_expectedStackAlignment = getRuntime()->getStackAlignment(); + func->_requiredStackAlignment = 0; + + // Allocate space for function arguments. + func->_args = nullptr; + if (func->getNumArgs() != 0) { + func->_args = _zoneAllocator.allocT(func->getNumArgs() * sizeof(VarData*)); + if (func->_args == nullptr) + goto _NoMemory; + ::memset(func->_args, 0, func->getNumArgs() * sizeof(VarData*)); + } + + return func; + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +X86FuncNode* X86Compiler::addFunc(const FuncPrototype& p) noexcept { + X86FuncNode* func = newFunc(p); + + if (func == nullptr) { + setLastError(kErrorNoHeapMemory); + return nullptr; + } + + return static_cast(addFunc(func)); +} + +HLSentinel* X86Compiler::endFunc() noexcept { + X86FuncNode* func = getFunc(); + ASMJIT_ASSERT(func != nullptr); + + // Add local constant pool at the end of the function (if exist). + setCursor(func->getExitNode()); + + if (_localConstPoolLabel.isInitialized()) { + embedConstPool(_localConstPoolLabel, _localConstPool); + _localConstPoolLabel.reset(); + _localConstPool.reset(); + } + + // Finalize. + func->addFuncFlags(kFuncFlagIsFinished); + _func = nullptr; + + setCursor(func->getEnd()); + return func->getEnd(); +} + +// ============================================================================ +// [asmjit::X86Compiler - Ret] +// ============================================================================ + +HLRet* X86Compiler::newRet(const Operand& o0, const Operand& o1) noexcept { + HLRet* node = newNode(o0, o1); + if (node == nullptr) + goto _NoMemory; + return node; + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +HLRet* X86Compiler::addRet(const Operand& o0, const Operand& o1) noexcept { + HLRet* node = newRet(o0, o1); + if (node == nullptr) + return node; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::X86Compiler - Call] +// ============================================================================ + +X86CallNode* X86Compiler::newCall(const Operand& o0, const FuncPrototype& p) noexcept { + X86CallNode* node = newNode(o0); + Error error; + uint32_t nArgs; + + if (node == nullptr) + goto _NoMemory; + + if ((error = node->_x86Decl.setPrototype(p)) != kErrorOk) { + setLastError(error); + return nullptr; + } + + // If there are no arguments skip the allocation. + if ((nArgs = p.getNumArgs()) == 0) + return node; + + node->_args = static_cast(_zoneAllocator.alloc(nArgs * sizeof(Operand))); + if (node->_args == nullptr) + goto _NoMemory; + + ::memset(node->_args, 0, nArgs * sizeof(Operand)); + return node; + +_NoMemory: + setLastError(kErrorNoHeapMemory); + return nullptr; +} + +X86CallNode* X86Compiler::addCall(const Operand& o0, const FuncPrototype& p) noexcept { + X86CallNode* node = newCall(o0, p); + if (node == nullptr) + return nullptr; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::X86Compiler - Vars] +// ============================================================================ + +Error X86Compiler::setArg(uint32_t argIndex, const Var& var) noexcept { + X86FuncNode* func = getFunc(); + + if (func == nullptr) + return kErrorInvalidArgument; + + if (!isVarValid(var)) + return kErrorInvalidState; + + VarData* vd = getVd(var); + func->setArg(argIndex, vd); + + return kErrorOk; +} + +Error X86Compiler::_newVar(Var* var, uint32_t vType, const char* name) noexcept { + ASMJIT_ASSERT(vType < kX86VarTypeCount); + vType = _targetVarMapping[vType]; + ASMJIT_ASSERT(vType != kInvalidVar); + + // The assertion won't be compiled in release build, however, we want to check + // this anyway. + if (vType == kInvalidVar) { + static_cast(var)->reset(); + return kErrorInvalidArgument; + } + + const VarInfo& vInfo = _x86VarInfo[vType]; + VarData* vd = _newVd(vInfo, name); + + if (vd == nullptr) { + static_cast(var)->reset(); + return getLastError(); + } + + var->_init_packed_op_sz_w0_id(Operand::kTypeVar, vInfo.getSize(), vInfo.getRegType() << 8, vd->getId()); + var->_vreg.vType = vType; + return kErrorOk; +} + +Error X86Compiler::_newVar(Var* var, uint32_t vType, const char* fmt, va_list ap) noexcept { + char name[64]; + + vsnprintf(name, ASMJIT_ARRAY_SIZE(name), fmt, ap); + name[ASMJIT_ARRAY_SIZE(name) - 1] = '\0'; + return _newVar(var, vType, name); +} + +// ============================================================================ +// [asmjit::X86Compiler - Stack] +// ============================================================================ + +Error X86Compiler::_newStack(BaseMem* mem, uint32_t size, uint32_t alignment, const char* name) noexcept { + if (size == 0) + return kErrorInvalidArgument; + + if (alignment > 64) + alignment = 64; + + VarInfo vi = { kInvalidVar, 0, kInvalidReg , kInvalidReg, 0, "" }; + VarData* vd = _newVd(vi, name); + + if (vd == nullptr) { + static_cast(mem)->reset(); + return getLastError(); + } + + vd->_size = size; + vd->_isStack = true; + vd->_alignment = static_cast(alignment); + + static_cast(mem)->_init(kMemTypeStackIndex, vd->getId(), 0, 0); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86Compiler - Const] +// ============================================================================ + +Error X86Compiler::_newConst(BaseMem* mem, uint32_t scope, const void* data, size_t size) noexcept { + Error error = kErrorOk; + size_t offset; + + Label* dstLabel; + ConstPool* dstPool; + + if (scope == kConstScopeLocal) { + dstLabel = &_localConstPoolLabel; + dstPool = &_localConstPool; + } + else if (scope == kConstScopeGlobal) { + dstLabel = &_globalConstPoolLabel; + dstPool = &_globalConstPool; + } + else { + error = kErrorInvalidArgument; + goto _OnError; + } + + error = dstPool->add(data, size, offset); + if (error != kErrorOk) + goto _OnError; + + if (dstLabel->getId() == kInvalidValue) { + *dstLabel = newLabel(); + if (!dstLabel->isInitialized()) { + error = kErrorNoHeapMemory; + goto _OnError; + } + } + + *static_cast(mem) = x86::ptr(*dstLabel, static_cast(offset), static_cast(size)); + return kErrorOk; + +_OnError: + return error; +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER && (ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64) diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86compiler.h b/DynamicHooks/thirdparty/AsmJit/x86/x86compiler.h new file mode 100644 index 0000000..8bd390c --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86compiler.h @@ -0,0 +1,7496 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86COMPILER_H +#define _ASMJIT_X86_X86COMPILER_H + +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/compiler.h" +#include "../base/vectypes.h" +#include "../x86/x86assembler.h" +#include "../x86/x86compilerfunc.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class X86CallNode; +class X86FuncNode; + +//! \addtogroup asmjit_x86 +//! \{ + +//! \internal +ASMJIT_VARAPI const VarInfo _x86VarInfo[]; + +#if defined(ASMJIT_BUILD_X86) +//! \internal +//! +//! Mapping of x86 variable types, including all abstract types, into their real types. +//! +//! This mapping translates the following: +//! - `kVarTypeInt64` to `kInvalidVar`. +//! - `kVarTypeUInt64` to `kInvalidVar`. +//! - `kVarTypeIntPtr` to `kVarTypeInt32`. +//! - `kVarTypeUIntPtr` to `kVarTypeUInt32`. +ASMJIT_VARAPI const uint8_t _x86VarMapping[kX86VarTypeCount]; +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) +//! \internal +//! +//! Mapping of x64 variable types, including all abstract types, into their real types. +//! +//! This mapping translates the following: +//! - `kVarTypeIntPtr` to `kVarTypeInt64`. +//! - `kVarTypeUIntPtr` to `kVarTypeUInt64`. +ASMJIT_VARAPI const uint8_t _x64VarMapping[kX86VarTypeCount]; +#endif // ASMJIT_BUILD_X64 + +// ============================================================================ +// [asmjit::X86FuncNode] +// ============================================================================ + +//! X86/X64 function node. +class X86FuncNode : public HLFunc { + public: + ASMJIT_NO_COPY(X86FuncNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `X86FuncNode` instance. + ASMJIT_INLINE X86FuncNode(Compiler* compiler) noexcept : HLFunc(compiler) { + _decl = &_x86Decl; + _saveRestoreRegs.reset(); + + _alignStackSize = 0; + _alignedMemStackSize = 0; + _pushPopStackSize = 0; + _moveStackSize = 0; + _extraStackSize = 0; + + _stackFrameRegIndex = kInvalidReg; + _isStackFrameRegPreserved = false; + + for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(_stackFrameCopyGpIndex); i++) + _stackFrameCopyGpIndex[i] = static_cast(kInvalidReg); + } + + //! Destroy the `X86FuncNode` instance. + ASMJIT_INLINE ~X86FuncNode() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get function declaration as `X86FuncDecl`. + ASMJIT_INLINE X86FuncDecl* getDecl() const noexcept { + return const_cast(&_x86Decl); + } + + //! Get argument. + ASMJIT_INLINE VarData* getArg(uint32_t i) const noexcept { + ASMJIT_ASSERT(i < _x86Decl.getNumArgs()); + return static_cast(_args[i]); + } + + //! Get registers which have to be saved in prolog/epilog. + ASMJIT_INLINE uint32_t getSaveRestoreRegs(uint32_t rc) noexcept { return _saveRestoreRegs.get(rc); } + + //! Get stack size needed to align stack back to the nature alignment. + ASMJIT_INLINE uint32_t getAlignStackSize() const noexcept { return _alignStackSize; } + //! Set stack size needed to align stack back to the nature alignment. + ASMJIT_INLINE void setAlignStackSize(uint32_t s) noexcept { _alignStackSize = s; } + + //! Get aligned stack size used by variables and memory allocated on the stack. + ASMJIT_INLINE uint32_t getAlignedMemStackSize() const noexcept { return _alignedMemStackSize; } + + //! Get stack size used by push/pop sequences in prolog/epilog. + ASMJIT_INLINE uint32_t getPushPopStackSize() const noexcept { return _pushPopStackSize; } + //! Set stack size used by push/pop sequences in prolog/epilog. + ASMJIT_INLINE void setPushPopStackSize(uint32_t s) noexcept { _pushPopStackSize = s; } + + //! Get stack size used by mov sequences in prolog/epilog. + ASMJIT_INLINE uint32_t getMoveStackSize() const noexcept { return _moveStackSize; } + //! Set stack size used by mov sequences in prolog/epilog. + ASMJIT_INLINE void setMoveStackSize(uint32_t s) noexcept { _moveStackSize = s; } + + //! Get extra stack size. + ASMJIT_INLINE uint32_t getExtraStackSize() const noexcept { return _extraStackSize; } + //! Set extra stack size. + ASMJIT_INLINE void setExtraStackSize(uint32_t s) noexcept { _extraStackSize = s; } + + //! Get whether the function has stack frame register (only when the stack is misaligned). + //! + //! NOTE: Stack frame register can be used for both - aligning purposes or + //! generating standard prolog/epilog sequence. + ASMJIT_INLINE bool hasStackFrameReg() const noexcept { + return _stackFrameRegIndex != kInvalidReg; + } + + //! Get stack frame register index. + //! + //! NOTE: Used only when stack is misaligned. + ASMJIT_INLINE uint32_t getStackFrameRegIndex() const noexcept { + return _stackFrameRegIndex; + } + + //! Get whether the stack frame register is preserved. + //! + //! NOTE: Used only when stack is misaligned. + ASMJIT_INLINE bool isStackFrameRegPreserved() const noexcept { + return static_cast(_isStackFrameRegPreserved); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! X86 function decl. + X86FuncDecl _x86Decl; + //! Registers which must be saved/restored in prolog/epilog. + X86RegMask _saveRestoreRegs; + + //! Stack size needed to align function back to the nature alignment. + uint32_t _alignStackSize; + //! Like `_memStackSize`, but aligned. + uint32_t _alignedMemStackSize; + + //! Stack required for push/pop in prolog/epilog (X86/X64 specific). + uint32_t _pushPopStackSize; + //! Stack required for movs in prolog/epilog (X86/X64 specific). + uint32_t _moveStackSize; + + //! Stack required to put extra data (for example function arguments + //! when manually aligning to requested alignment). + uint32_t _extraStackSize; + + //! Stack frame register. + uint8_t _stackFrameRegIndex; + //! Whether the stack frame register is preserved. + uint8_t _isStackFrameRegPreserved; + //! Gp registers indexes that can be used to copy function arguments + //! to a new location in case we are doing manual stack alignment. + uint8_t _stackFrameCopyGpIndex[6]; +}; + +// ============================================================================ +// [asmjit::X86CallNode] +// ============================================================================ + +//! X86/X64 function-call node. +class X86CallNode : public HLCall { + public: + ASMJIT_NO_COPY(X86CallNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `X86CallNode` instance. + ASMJIT_INLINE X86CallNode(Compiler* compiler, const Operand& target) noexcept : HLCall(compiler, target) { + _decl = &_x86Decl; + _usedArgs.reset(); + } + + //! Destroy the `X86CallNode` instance. + ASMJIT_INLINE ~X86CallNode() noexcept {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the function prototype. + ASMJIT_INLINE X86FuncDecl* getDecl() const noexcept { + return const_cast(&_x86Decl); + } + + // -------------------------------------------------------------------------- + // [Prototype] + // -------------------------------------------------------------------------- + + //! Set function prototype. + ASMJIT_INLINE Error setPrototype(const FuncPrototype& p) noexcept { + return _x86Decl.setPrototype(p); + } + + // -------------------------------------------------------------------------- + // [Arg / Ret] + // -------------------------------------------------------------------------- + + //! Set argument at `i` to `op`. + ASMJIT_API bool _setArg(uint32_t i, const Operand& op) noexcept; + //! Set return at `i` to `op`. + ASMJIT_API bool _setRet(uint32_t i, const Operand& op) noexcept; + + //! Set argument at `i` to `var`. + ASMJIT_INLINE bool setArg(uint32_t i, const Var& var) noexcept { return _setArg(i, var); } + //! Set argument at `i` to `reg` (FP registers only). + ASMJIT_INLINE bool setArg(uint32_t i, const X86FpReg& reg) noexcept { return _setArg(i, reg); } + //! Set argument at `i` to `imm`. + ASMJIT_INLINE bool setArg(uint32_t i, const Imm& imm) noexcept { return _setArg(i, imm); } + + //! Set return at `i` to `var`. + ASMJIT_INLINE bool setRet(uint32_t i, const Var& var) noexcept { return _setRet(i, var); } + //! Set return at `i` to `reg` (FP registers only). + ASMJIT_INLINE bool setRet(uint32_t i, const X86FpReg& reg) noexcept { return _setRet(i, reg); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! X86 declaration. + X86FuncDecl _x86Decl; + //! Mask of all registers actually used to pass function arguments. + //! + //! NOTE: This bit-mask is not the same as `X86Func::_passed`. It contains + //! only registers actually used to do the call while `X86Func::_passed` + //! mask contains all registers for all function prototype combinations. + X86RegMask _usedArgs; +}; + +// ============================================================================ +// [asmjit::X86Compiler] +// ============================================================================ + +//! X86/X64 compiler. +//! +//! This class is used to store instruction stream and allows to modify it on +//! the fly. It uses a different concept than `Assembler` class and in fact +//! `Assembler` is only used as a backend. Compiler never emits machine code +//! directly, it stores instructions in a code-stream instead. This allows to +//! modify the code-stream later or and to apply various transformations to it. +//! +//! `X86Compiler` moves the code generation to a higher level. Higher level +//! constructs allow to write more abstract and extensible code that is not +//! possible with pure `X86Assembler` solution. +//! +//! The Story +//! --------- +//! +//! The compiler was created as a solution to bring higher level concepts into +//! a very low-level code generation. It started as an experiment to unify +//! the code generator for X86 and X86 architectures. These architectures are +//! built on the same ground, but use some concepts that are radically different +//! between X86 and X64. Basically the X64 architecture is a good evolution of +//! X86, because it offers much more registers and it added support for relative +//! addressing. Both architectures also use different ABI, which means that +//! function calling conventions are incompatible between each other (not just +//! between architectures, but also between OSes). +//! +//! This is a pain when it comes to low-level code generation. When AsmJit was +//! first published the main author's plan was to use it for a 2D pipeline +//! generation. In this task the main use of AsmJit was to combine several code +//! sections altogether without worrying about "which register should contain +//! what". This meant that a pure `X86Assembler` probably won't do the job +//! itself. Instead of hacking the `X86Assembler` to do more the `X86Compiler` +//! concept started to provide a layer that will consume instructions the same +//! way as `X86Assembler`, transform them, and serialize them to `X86Assembler`. +//! +//! The compiler concept evolved rapidly after the initial version and was +//! rewritten several times before it stabilized into the current form. It is +//! at the moment still evolving and it used to be the biggest source of bugs +//! in AsmJit in the past (doing non-trivial transformations has it's down-sides). +//! +//! The compiler at the moment uses linear-scan register allocation and can +//! look ahead to see which registers it should use. There are many limitations +//! at the moment, but if the resulting code doesn't use so much registers at +//! a same time it's pretty decent. However, please don't expect miracles, it +//! cannot compete with register allocators used in todays C++ compilers. +//! +//! Code Generation +//! --------------- +//! +//! The `X86Compiler` uses `X86Assembler` as a backend. It integrates with it, +//! which means that labels created by Assembler can be used by Compiler and +//! vice-versa. The following code shows the preferred and simplest way of +//! creating a compiler: +//! +//! ~~~ +//! JitRuntime runtime; +//! X86Assembler a(&runtime); +//! X86Compiler c(&a); +//! +//! // ... use the compiler `c` ... +//! +//! c.finalize(); +//! ~~~ +//! +//! After the `finalize()` is called the compiler is detached from the assembler +//! and reset (cannot be used after finalization). It can be reattached again by +//! using `c.attach(&a)`, but the Compiler won't remember anything from the +//! previous code generation execution - it will be like creating a new instance +//! of `X86Compiler`. +//! +//! Functions +//! --------- +//! +//! See \ref asmjit::Compiler::addFunc(). +//! +//! Variables +//! --------- +//! +//! Compiler has a built-in support for variables and assigning function +//! arguments. Variables are created by using `newXXX()` methods. If the +//! methods ends with `Var`, like `newXmmVar()` it accepts a variable type +//! as a first parameter. Variable type defines the layout and size of the +//! variable. It's the most important for general purpose registers, where +//! the variable type affects which instructions are generated when used as +//! operands. For example "mov eax, edx" is different than "mov rax, rdx", +//! but it's still the same "mov" instruction. Since the variable types are +//! verbose an alternative form to create variables easier was introduced. +//! +//! Instead of using `newGpVar(kVarTypeIntX, ...)` alternative forms like +//! `newIntX(...)` or `newUIntX(...)` can be used instead. Variables can +//! have a name so the code that creates a variable usually looks like +//! `newInt32("a")` or `newIntPtr("pInputBuffer")`, etc... +//! +//! Other register types like MMX or XMM have also alternative forms, so +//! for example `newMm("mmx")`, `newXmm("xmm")`, `newXmmPd("doubles")`, and +//! other forms can be used to create SIMD variables. +//! +//! Function arguments are associated with variables by using `setArg()`, where +//! the first parameter is argument index and the second parameter is the +//! variable instance. Function arguments can be a little bit tricky, because +//! asmjit allows to also define 64-bit arguments on a 32-bit architecture, +//! where the argument itself is split into two - lower-32 bit and higher 32-bit. +//! This applies also to a return value. +//! +//! The following snippet shows how to create a function and associate function +//! arguments with variables: +//! +//! ~~~ +//! JitRuntime runtime; +//! X86Assembler a(&runtime); +//! X86Compiler c(&a); +//! +//! // Function prototype is "int function(int*, int*)" by using the host +//! // calling convention, which should be __cdecl in our case (if not +//! // configured to something else). +//! c.addFunc(FuncBuilder2(kCallConvHost)); +//! +//! // Associate function arguments. +//! X86GpVar pX = c.newIntPtr("pX"); +//! X86GpVar pY = c.newIntPtr("pY"); +//! +//! c.setArg(0, pX); +//! c.setArg(1, pY); +//! +//! // Do something useful :) +//! X86GpVar x = c.newInt32("x"); +//! X86GpVar y = c.newInt32("y"); +//! +//! c.mov(x, dword_ptr(pX)); +//! c.add(y, dword_ptr(pY)); +//! +//! // Return `x`. +//! c.ret(x); +//! +//! // End of the function body. +//! c.endFunc(); +//! +//! // Finalize the compiler. +//! c.finalize(); +//! +//! // Use the `X86Assembler` to assemble and relocate the function. It returns +//! // a pointer to the first byte of the code generated, which is the function +//! // entry point in our case. +//! typedef void (*MyFunc)(int*); +//! MyFunc func = asmjit_cast(a.make()); +//! ~~~ +//! +//! The snippet uses methods to create variables, to associate them with +//! function arguments, and to use them to return from the generated function. +//! +//! When a variable is created, the initial state is `kVarStateNone`, when +//! it's allocated to the register or spilled to a memory it changes its +//! state to `kVarStateReg` or `kVarStateMem`, respectively. It's usual +//! during the variable that its state is changed multiple times. To generate +//! a better code, you can control explicitely the allocation and spilling: +//! +//! - `alloc()` - Explicit method to alloc variable into register. It can be +//! used to force allocation a variable before a loop for example. +//! +//! - `spill()` - Explicit method to spill variable. If variable is in +//! register and you call this method, it's moved to its home memory +//! location. If the variable is not in register no operation is performed. +//! +//! - `unuse()` - Unuse variable (you can use this to end the variable scope +//! or sub-scope). +//! +//! List of X86/X64 variable types: +//! - `kVarTypeInt8` - Signed 8-bit integer, mapped to GPD register (eax, ebx, ...). +//! - `kVarTypeUInt8` - Unsigned 8-bit integer, mapped to GPD register (eax, ebx, ...). +//! - `kVarTypeInt16` - Signed 16-bit integer, mapped to GPD register (eax, ebx, ...). +//! - `kVarTypeUInt16` - Unsigned 16-bit integer, mapped to GPD register (eax, ebx, ...). +//! - `kVarTypeInt32` - Signed 32-bit integer, mapped to GPD register (eax, ebx, ...). +//! - `kVarTypeUInt32` - Unsigned 32-bit integer, mapped to GPD register (eax, ebx, ...). +//! - `kVarTypeInt64` - Signed 64-bit integer, mapped to GPQ register (rax, rbx, ...). +//! - `kVarTypeUInt64` - Unsigned 64-bit integer, mapped to GPQ register (rax, rbx, ...). +//! - `kVarTypeIntPtr` - intptr_t, mapped to GPD/GPQ register; depends on target, not host! +//! - `kVarTypeUIntPtr` - uintptr_t, mapped to GPD/GPQ register; depends on target, not host! +//! - `kX86VarTypeMm` - 64-bit MMX register (MM0, MM1, ...). +//! - `kX86VarTypeXmm` - 128-bit XMM register. +//! - `kX86VarTypeXmmSs` - 128-bit XMM register that contains a scalar float. +//! - `kX86VarTypeXmmSd` - 128-bit XMM register that contains a scalar double. +//! - `kX86VarTypeXmmPs` - 128-bit XMM register that contains 4 packed floats. +//! - `kX86VarTypeXmmPd` - 128-bit XMM register that contains 2 packed doubles. +//! - `kX86VarTypeYmm` - 256-bit YMM register. +//! - `kX86VarTypeYmmPs` - 256-bit YMM register that contains 8 packed floats. +//! - `kX86VarTypeYmmPd` - 256-bit YMM register that contains 4 packed doubles. +//! - `kX86VarTypeZmm` - 512-bit ZMM register. +//! - `kX86VarTypeZmmPs` - 512-bit ZMM register that contains 16 packed floats. +//! - `kX86VarTypeZmmPd` - 512-bit ZMM register that contains 8 packed doubles. +//! +//! List of X86/X64 variable states: +//! - `kVarStateNone - State that is assigned to newly created variables or to +//! not used variables (dereferenced to zero). +//! - `kVarStateReg - State that means that variable is currently allocated in +//! register. +//! - `kVarStateMem - State that means that variable is currently only in +//! memory location. +//! +//! Memory Management +//! ----------------- +//! +//! Compiler Memory management follows these rules: +//! +//! - Everything created by `X86Compiler` is always freed by `X86Compiler`. +//! - To get a decent performance, compiler always uses large memory buffers +//! to allocate objects. When the compiler is destroyed, it invalidates all +//! objects that it created. +//! - This type of memory management is called 'zone memory management'. +//! +//! In other words, anything that returns a pointer to something cannot be +//! used after the compiler was destroyed. However, since compiler integrates +//! with assembler, labels created by Compiler can be used by Assembler or +//! another Compiler attached to it. +//! +//! Control-Flow and State Management +//! --------------------------------- +//! +//! The `X86Compiler` automatically manages state of all variables when using +//! control flow instructions like jumps, conditional jumps and function calls. +//! +//! In general the internal state can be changed only when using jump or +//! conditional jump. When using non-conditional jump the state change is +//! embedded before the jump itself, so there is basically zero overhead. +//! However, conditional jumps are more complicated and the compiler can +//! generate in some cases a block at the end of the function that changes +//! the state of one branch. Usually the "taken" branch is embedded directly +//! before the jump, and "not-taken" branch has the separate code block. +//! +//! The next example shows to the extra code block generated for a state change: +//! +//! ~~~ +//! JitRuntime runtime; +//! X86Assembler a(&runtime); +//! X86Compiler c(&a); +//! +//! c.addFunc(FuncBuilder0(kCallConvHost)); +//! +//! Label L0 = c.newLabel(); +//! X86GpVar x = c.newInt32("x"); +//! X86GpVar y = c.newInt32("y"); +//! +//! // After these two lines, `x` and `y` will be always stored in registers: +//! // x - register. +//! // y - register. +//! c.xor_(x, x); +//! c.xor_(y, y); +//! c.cmp(x, y); +//! +//! // Manually spill `x` and `y`: +//! // x - memory. +//! // y - memory. +//! c.spill(x); +//! c.spill(y); +//! +//! // Conditional jump to L0. It will be always taken, but the compiler thinks +//! // that it is unlikely to be taken so it will embed the state-change code +//! // somewhere else. +//! c.je(L0); +//! +//! // Do something. The variables `x` and `y` will be allocated again. +//! // `x` - register. +//! // `y` - register. +//! c.add(x, 1); +//! c.add(y, 2); +//! +//! // Bind a label here, the state is not changed. +//! // `x` - register. +//! // `y` - register. +//! c.bind(L0); +//! +//! // Use `x` and `y`, because the compiler knows the life-time and can +//! // eliminate the state change of dead variables. +//! // `x` - register. +//! // `y` - register. +//! c.sub(x, y); +//! +//! c.endFunc(); +//! ~~~ +//! +//! The output: +//! +//! ~~~ +//! xor eax, eax ; xor x, x +//! xor ecx, ecx ; xor y, y +//! cmp eax, ecx ; cmp x, y +//! mov [esp - 24], eax ; spill x +//! mov [esp - 28], ecx ; spill x +//! je L0_Switch +//! mov eax, [esp - 24] ; alloc x +//! add eax, 1 ; add x, 1 +//! mov ecx, [esp - 28] ; alloc y +//! add ecx, 2 ; add y, 2 +//! L0: +//! sub eax, ecx ; sub x, y +//! ret +//! +//! ; state-switch begin +//! L0_Switch0: +//! mov eax, [esp - 24] ; alloc x +//! mov ecx, [esp - 28] ; alloc y +//! jmp short L0 +//! ; state-switch end +//! ~~~ +//! +//! As can be seen, the state-switch section was generated (L0_Switch0). The +//! compiler was unable to restore the state immediately when emitting the +//! forward jump (the code is generated from the first to last instruction +//! and the target state is simply not known at this time). +//! +//! To tell the compiler to embed the state-switch code before the jump it's +//! needed to create a backward jump (where also processor expects that it +//! will be taken). A slightly modified code is used to demonstrate the +//! possibility to embed the state-switch before the jump: +//! +//! ~~~ +//! JitRuntime runtime; +//! X86Assembler a(&runtime); +//! Compiler c(&a); +//! +//! c.addFunc(FuncBuilder0(kCallConvHost)); +//! +//! Label L0 = c.newLabel(); +//! X86GpVar x = c.newInt32("x"); +//! X86GpVar y = c.newInt32("y"); +//! +//! // After these two lines, `x` and `y` will be always stored in registers. +//! // `x` - register. +//! // `y` - register. +//! c.xor_(x, x); +//! c.xor_(y, y); +//! +//! // Manually spill `x` and `y`. +//! // `x` - memory. +//! // `y` - memory. +//! c.spill(x); +//! c.spill(y); +//! +//! // Bind a label here, the state is not changed. +//! // `x` - memory. +//! // `y` - memory. +//! c.bind(L0); +//! +//! // Do something, the variables will be allocated again. +//! c.add(x, 1); +//! c.add(y, 2); +//! // State: +//! // `x` - register. +//! // `y` - register. +//! +//! // Backward conditional jump to L0. The default behavior is that it +//! // will be taken so the state-change code will be embedded here. +//! c.je(L0); +//! +//! c.endFunc(); +//! ~~~ +//! +//! The output: +//! +//! ~~~ +//! xor ecx, ecx ; xor x, x +//! xor edx, edx ; xor y, y +//! mov [esp - 24], ecx ; spill x +//! mov [esp - 28], edx ; spill y +//! L2: +//! mov ecx, [esp - 24] ; alloc x +//! add ecx, 1 ; add x, 1 +//! mov edx, [esp - 28] ; alloc y +//! add edx, 2 ; add y, 2 +//! +//! ; state-switch begin +//! mov [esp - 24], ecx ; spill x +//! mov [esp - 28], edx ; spill y +//! ; state-switch end +//! +//! je short L2 +//! ret +//! ~~~ +//! +//! Please note where the state-switch sections are located in both examples. +//! To inform the compiler which branch is likely to be taken use the following +//! options: +//! - `kInstOptionTaken` - The conditional jump is likely to be taken. +//! - `kInstOptionNotTaken` - The conditional jump is unlikely to be taken. +//! +//! Both options can be used by simply using `taken()` and/or `notTaken()`. The +//! example above could be changed to `c.taken().je(L0)`, which would generate +//! the following output: +//! +//! ~~~ +//! xor ecx, ecx ; xor x, x +//! xor edx, edx ; xor y, y +//! mov [esp - 24], ecx ; spill x +//! mov [esp - 28], edx ; spill y +//! L0: +//! mov ecx, [esp - 24] ; alloc x +//! add ecx, 1 ; add x, 1 +//! mov edx, [esp - 28] ; alloc y +//! add edx, 2 ; add y, 2 +//! je L0_Switch, 2 +//! ret +//! +//! ; state-switch begin +//! L0_Switch: +//! mov [esp - 24], ecx ; spill x +//! mov [esp - 28], edx ; spill y +//! jmp short L0 +//! ; state-switch end +//! ~~~ +//! +//! This section provided information of how the state-change works. The +//! behavior is deterministic and can be overridden manually if needed. +//! +//! Advanced Code Generation +//! ------------------------ +//! +//! This section describes an advanced method of code generation available in +//! assembler and compiler. Every instruction supported by AsmJit has its ID, +//! which can be used with method `emit()` instead of using compiler's intrinsics. +//! For example `mov(x, y)` is an equivalent to `emit(kX86InstIdMov, x, y)`. +//! The later is, however, not type-safe and C++ compiler won't help you to +//! detect some bugs at compile time. On the other hand the later allows to +//! generate some code programatically without using if/else constructs. +//! +//! There are many use-cases where the unsafe API can be used, for example: +//! +//! ~~~ +//! uint32_t translateOp(const char* op) { +//! if (strcmp(op, "add")) return kX86InstIdAddsd; +//! if (strcmp(op, "sub")) return kX86InstIdSubsd; +//! if (strcmp(op, "mul")) return kX86InstIdMulsd; +//! if (strcmp(op, "div")) return kX86InstIdDivsd; +//! +//! return kInstIdNone; +//! } +//! +//! void emitArith(X86Compiler& c, const char* op, const X86XmmVar& a, const X86XmmVar& b) { +//! uint32_t instId = translateOp(op); +//! if (instId != kInstIdNone) +//! c.emit(instId, a, b); +//! } +//! ~~~ +//! +//! Other use cases are waiting for you! Be sure that instruction that are +//! being emitted are correct and encodable, otherwise the Assembler will +//! fail and set the status code to `kErrorUnknownInst`. +class ASMJIT_VIRTAPI X86Compiler : public Compiler { + public: + ASMJIT_NO_COPY(X86Compiler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a `X86Compiler` instance. + ASMJIT_API X86Compiler(X86Assembler* assembler = nullptr) noexcept; + //! Destroy the `X86Compiler` instance. + ASMJIT_API ~X86Compiler() noexcept; + + // -------------------------------------------------------------------------- + // [Attach / Reset] + // -------------------------------------------------------------------------- + + //! \override + ASMJIT_API virtual Error attach(Assembler* assembler) noexcept; + //! \override + ASMJIT_API virtual void reset(bool releaseMemory) noexcept; + + // ------------------------------------------------------------------------- + // [Finalize] + // ------------------------------------------------------------------------- + + ASMJIT_API virtual Error finalize() noexcept; + + // -------------------------------------------------------------------------- + // [Assembler] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86Assembler* getAssembler() const noexcept { + return static_cast(_assembler); + } + + // -------------------------------------------------------------------------- + // [Arch] + // -------------------------------------------------------------------------- + + //! Get count of registers of the current architecture and mode. + ASMJIT_INLINE const X86RegCount& getRegCount() const noexcept { return _regCount; } + + //! Get GPD or GPQ register depending on the current architecture. + ASMJIT_INLINE X86GpReg gpz(uint32_t index) const noexcept { return X86GpReg(zax, index); } + + //! Create an architecture dependent intptr_t memory operand. + ASMJIT_INLINE X86Mem intptr_ptr(const X86GpReg& base, int32_t disp = 0) const noexcept { + return x86::ptr(base, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const X86GpReg& base, const X86GpReg& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return x86::ptr(base, index, shift, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const Label& label, int32_t disp = 0) const noexcept { + return x86::ptr(label, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const Label& label, const X86GpReg& index, uint32_t shift, int32_t disp = 0) const noexcept { + return x86::ptr(label, index, shift, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const X86RipReg& rip, int32_t disp = 0) const noexcept { + return x86::ptr(rip, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr_abs(Ptr pAbs, int32_t disp = 0) const noexcept { + return x86::ptr_abs(pAbs, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr_abs(Ptr pAbs, const X86GpReg& index, uint32_t shift, int32_t disp = 0) const noexcept { + return x86::ptr_abs(pAbs, index, shift, disp, zax.getSize()); + } + + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const X86GpVar& base, int32_t disp = 0) noexcept { + return x86::ptr(base, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const X86GpVar& base, const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) noexcept { + return x86::ptr(base, index, shift, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr(const Label& label, const X86GpVar& index, uint32_t shift, int32_t disp = 0) noexcept { + return x86::ptr(label, index, shift, disp, zax.getSize()); + } + //! \overload + ASMJIT_INLINE X86Mem intptr_ptr_abs(Ptr pAbs, const X86GpVar& index, uint32_t shift, int32_t disp = 0) noexcept { + return x86::ptr_abs(pAbs, index, shift, disp, zax.getSize()); + } + + // -------------------------------------------------------------------------- + // [Inst / Emit] + // -------------------------------------------------------------------------- + + //! Create a new `HLInst`. + ASMJIT_API HLInst* newInst(uint32_t code) noexcept; + //! \overload + ASMJIT_API HLInst* newInst(uint32_t code, const Operand& o0) noexcept; + //! \overload + ASMJIT_API HLInst* newInst(uint32_t code, const Operand& o0, const Operand& o1) noexcept; + //! \overload + ASMJIT_API HLInst* newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) noexcept; + //! \overload + ASMJIT_API HLInst* newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) noexcept; + //! \overload + ASMJIT_API HLInst* newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4) noexcept; + + //! Add a new `HLInst`. + ASMJIT_API HLInst* emit(uint32_t code) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4) noexcept; + + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, int o0) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, uint64_t o0) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, int o1) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, uint64_t o1) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1, int o2) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1, uint64_t o2) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3) noexcept; + //! \overload + ASMJIT_API HLInst* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, uint64_t o3) noexcept; + + // -------------------------------------------------------------------------- + // [Func] + // -------------------------------------------------------------------------- + + //! Create a new `X86FuncNode`. + ASMJIT_API X86FuncNode* newFunc(const FuncPrototype& p) noexcept; + + using Compiler::addFunc; + + //! Add a new function. + //! + //! \param p Function prototype. + //! + //! This method is usually used as a first step used to generate a dynamic + //! function. The prototype `p` contains a function calling convention, + //! return value, and parameters. There are some helper classes that simplify + //! function prototype building, see `FuncBuilder0<...>`, `FuncBuilder1<...>`, + //! `FuncBuilder2<...>`, etc... + //! + //! Templates with `FuncBuilder` prefix are used to generate a function + //! prototype based on real C++ types. See the next example that shows how + //! to generate a function with two 32-bit integer arguments. + //! + //! ~~~ + //! JitRuntime runtime; + //! X86Assembler a(&runtime); + //! X86Compiler c(&a); + //! + //! // Add a function - . + //! c.addFunc(FuncBuilder2(kCallConvHost)); + //! + //! // ... body ... + //! + //! // End of the function. + //! c.endFunc(); + //! ~~~ + //! + //! Building functions is really easy! The code snippet above can be used + //! to generate a function with two `int32_t` arguments. To assign a variable + //! to a function argument use `c.setArg(index, variable)`. + //! + //! ~~~ + //! JitRuntime runtime; + //! X86Assembler a(&runtime); + //! X86Compiler c(&a); + //! + //! X86GpVar arg0 = c.newInt32("arg0"); + //! X86GpVar arg1 = c.newInt32("arg1"); + //! + //! // Add a function - . + //! c.addFunc(FuncBuilder2(kCallConvHost)); + //! + //! c.setArg(0, arg0); + //! c.setArg(1, arg1); + //! + //! // ... do something ... + //! c.add(arg0, arg1); + //! + //! // End of the function. + //! c.endFunc(); + //! ~~~ + //! + //! Arguments are like variables. How to manipulate with variables is + //! documented in a variables section of `X86Compiler` documentation. + //! + //! NOTE: To get the current function use `getFunc()` method. + //! + //! \sa \ref FuncBuilder0, \ref FuncBuilder1, \ref FuncBuilder2. + ASMJIT_API X86FuncNode* addFunc(const FuncPrototype& p) noexcept; + + //! Emit a sentinel that marks the end of the current function. + ASMJIT_API HLSentinel* endFunc() noexcept; + + //! Get the current function node casted to `X86FuncNode`. + //! + //! This method can be called within `addFunc()` and `endFunc()` block to get + //! current function you are working with. It's recommended to store `HLFunc` + //! pointer returned by `addFunc<>` method, because this allows you in future + //! implement function sections outside of the function itself. + ASMJIT_INLINE X86FuncNode* getFunc() const noexcept { + return static_cast(_func); + } + + // -------------------------------------------------------------------------- + // [Ret] + // -------------------------------------------------------------------------- + + //! Create a new `HLRet`. + ASMJIT_API HLRet* newRet(const Operand& o0, const Operand& o1) noexcept; + //! Add a new `HLRet`. + ASMJIT_API HLRet* addRet(const Operand& o0, const Operand& o1) noexcept; + + // -------------------------------------------------------------------------- + // [Call] + // -------------------------------------------------------------------------- + + //! Create a new `X86CallNode`. + ASMJIT_API X86CallNode* newCall(const Operand& o0, const FuncPrototype& p) noexcept; + //! Add a new `X86CallNode`. + ASMJIT_API X86CallNode* addCall(const Operand& o0, const FuncPrototype& p) noexcept; + + // -------------------------------------------------------------------------- + // [Args] + // -------------------------------------------------------------------------- + + //! Set function argument to `var`. + ASMJIT_API Error setArg(uint32_t argIndex, const Var& var) noexcept; + + // -------------------------------------------------------------------------- + // [Vars] + // -------------------------------------------------------------------------- + + ASMJIT_API Error _newVar(Var* var, uint32_t vType, const char* name) noexcept; + ASMJIT_API Error _newVar(Var* var, uint32_t vType, const char* fmt, va_list ap) noexcept; + +#if !defined(ASMJIT_DISABLE_LOGGER) +#define ASMJIT_NEW_VAR_TYPE_EX(func, type, typeFirst, typeLast) \ + ASMJIT_NOINLINE type new##func(uint32_t vType, const char* name, ...) { \ + ASMJIT_ASSERT(vType < kX86VarTypeCount); \ + ASMJIT_ASSERT(Utils::inInterval(vType, typeFirst, typeLast)); \ + \ + type var(NoInit); \ + va_list ap; \ + va_start(ap, name); \ + \ + _newVar(&var, vType, name, ap); \ + \ + va_end(ap); \ + return var; \ + } +#define ASMJIT_NEW_VAR_AUTO_EX(func, type, typeId) \ + ASMJIT_NOINLINE type new##func(const char* name, ...) { \ + type var(NoInit); \ + va_list ap; \ + va_start(ap, name); \ + \ + _newVar(&var, typeId, name, ap); \ + \ + va_end(ap); \ + return var; \ + } +#else +#define ASMJIT_NEW_VAR_TYPE_EX(func, type, typeFirst, typeLast) \ + ASMJIT_NOINLINE type new##func(uint32_t vType, const char* name, ...) { \ + ASMJIT_ASSERT(vType < kX86VarTypeCount); \ + ASMJIT_ASSERT(Utils::inInterval(vType, typeFirst, typeLast)); \ + \ + type var(NoInit); \ + _newVar(&var, vType, nullptr); \ + return var; \ + } +#define ASMJIT_NEW_VAR_AUTO_EX(func, type, typeId) \ + ASMJIT_NOINLINE type new##func(const char* name, ...) { \ + type var(NoInit); \ + _newVar(&var, typeId, nullptr); \ + return var; \ + } +#endif + +#define ASMJIT_REGISTER_VAR_TYPE(func, type, typeFirst, typeLast) \ + ASMJIT_INLINE type get##func##ById(uint32_t vType, uint32_t id) { \ + ASMJIT_ASSERT(vType < kX86VarTypeCount); \ + ASMJIT_ASSERT(Utils::inInterval(vType, typeFirst, typeLast)); \ + \ + type var(NoInit); \ + \ + vType = _targetVarMapping[vType]; \ + const VarInfo& vInfo = _x86VarInfo[vType]; \ + \ + var._init_packed_op_sz_w0_id(Operand::kTypeVar, vInfo.getSize(), vInfo.getRegType() << 8, id); \ + var._vreg.vType = vType; \ + \ + return var; \ + } \ + \ + ASMJIT_INLINE type new##func(uint32_t vType) { \ + ASMJIT_ASSERT(vType < kX86VarTypeCount); \ + ASMJIT_ASSERT(Utils::inInterval(vType, typeFirst, typeLast)); \ + \ + type var(NoInit); \ + _newVar(&var, vType, nullptr); \ + return var; \ + } \ + \ + ASMJIT_NEW_VAR_TYPE_EX(func, type, typeFirst, typeLast) + +#define ASMJIT_REGISTER_VAR_AUTO(func, type, typeId) \ + ASMJIT_INLINE type get##func##ById(uint32_t id) { \ + type var(NoInit); \ + \ + uint32_t vType = _targetVarMapping[typeId]; \ + const VarInfo& vInfo = _x86VarInfo[vType]; \ + \ + var._init_packed_op_sz_w0_id(Operand::kTypeVar, vInfo.getSize(), vInfo.getRegType() << 8, id); \ + var._vreg.vType = vType; \ + \ + return var; \ + } \ + \ + ASMJIT_INLINE type new##func() { \ + type var(NoInit); \ + _newVar(&var, typeId, nullptr); \ + return var; \ + } \ + \ + ASMJIT_NEW_VAR_AUTO_EX(func, type, typeId) + + ASMJIT_REGISTER_VAR_TYPE(GpVar , X86GpVar , _kVarTypeIntStart , _kVarTypeIntEnd ) + ASMJIT_REGISTER_VAR_TYPE(MmVar , X86MmVar , _kX86VarTypeMmStart , _kX86VarTypeMmEnd ) + ASMJIT_REGISTER_VAR_TYPE(XmmVar , X86XmmVar, _kX86VarTypeXmmStart, _kX86VarTypeXmmEnd) + ASMJIT_REGISTER_VAR_TYPE(YmmVar , X86YmmVar, _kX86VarTypeYmmStart, _kX86VarTypeYmmEnd) + + ASMJIT_REGISTER_VAR_AUTO(Int8 , X86GpVar , kVarTypeInt8 ) + ASMJIT_REGISTER_VAR_AUTO(Int16 , X86GpVar , kVarTypeInt16 ) + ASMJIT_REGISTER_VAR_AUTO(Int32 , X86GpVar , kVarTypeInt32 ) + ASMJIT_REGISTER_VAR_AUTO(Int64 , X86GpVar , kVarTypeInt64 ) + ASMJIT_REGISTER_VAR_AUTO(IntPtr , X86GpVar , kVarTypeIntPtr ) + ASMJIT_REGISTER_VAR_AUTO(UInt8 , X86GpVar , kVarTypeUInt8 ) + ASMJIT_REGISTER_VAR_AUTO(UInt16 , X86GpVar , kVarTypeUInt16 ) + ASMJIT_REGISTER_VAR_AUTO(UInt32 , X86GpVar , kVarTypeUInt32 ) + ASMJIT_REGISTER_VAR_AUTO(UInt64 , X86GpVar , kVarTypeUInt64 ) + ASMJIT_REGISTER_VAR_AUTO(UIntPtr, X86GpVar , kVarTypeUIntPtr ) + ASMJIT_REGISTER_VAR_AUTO(Mm , X86MmVar , kX86VarTypeMm ) + ASMJIT_REGISTER_VAR_AUTO(Xmm , X86XmmVar, kX86VarTypeXmm ) + ASMJIT_REGISTER_VAR_AUTO(XmmSs , X86XmmVar, kX86VarTypeXmmSs) + ASMJIT_REGISTER_VAR_AUTO(XmmSd , X86XmmVar, kX86VarTypeXmmSd) + ASMJIT_REGISTER_VAR_AUTO(XmmPs , X86XmmVar, kX86VarTypeXmmPs) + ASMJIT_REGISTER_VAR_AUTO(XmmPd , X86XmmVar, kX86VarTypeXmmPd) + ASMJIT_REGISTER_VAR_AUTO(Ymm , X86YmmVar, kX86VarTypeYmm ) + ASMJIT_REGISTER_VAR_AUTO(YmmPs , X86YmmVar, kX86VarTypeYmmPs) + ASMJIT_REGISTER_VAR_AUTO(YmmPd , X86YmmVar, kX86VarTypeYmmPd) + +#undef ASMJIT_NEW_VAR_AUTO +#undef ASMJIT_NEW_VAR_TYPE + + // -------------------------------------------------------------------------- + // [Stack] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error _newStack(BaseMem* mem, uint32_t size, uint32_t alignment, const char* name) noexcept; + + //! Create a new memory chunk allocated on the current function's stack. + ASMJIT_INLINE X86Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) noexcept { + X86Mem m(NoInit); + _newStack(&m, size, alignment, name); + return m; + } + + // -------------------------------------------------------------------------- + // [Const] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error _newConst(BaseMem* mem, uint32_t scope, const void* data, size_t size) noexcept; + + //! Put data to a constant-pool and get a memory reference to it. + ASMJIT_INLINE X86Mem newConst(uint32_t scope, const void* data, size_t size) noexcept { + X86Mem m(NoInit); + _newConst(&m, scope, data, size); + return m; + } + + //! Put a BYTE `val` to a constant-pool. + ASMJIT_INLINE X86Mem newByteConst(uint32_t scope, uint8_t val) noexcept { return newConst(scope, &val, 1); } + //! Put a WORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newWordConst(uint32_t scope, uint16_t val) noexcept { return newConst(scope, &val, 2); } + //! Put a DWORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newDWordConst(uint32_t scope, uint32_t val) noexcept { return newConst(scope, &val, 4); } + //! Put a QWORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newQWordConst(uint32_t scope, uint64_t val) noexcept { return newConst(scope, &val, 8); } + + //! Put a WORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newInt16Const(uint32_t scope, int16_t val) noexcept { return newConst(scope, &val, 2); } + //! Put a WORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newUInt16Const(uint32_t scope, uint16_t val) noexcept { return newConst(scope, &val, 2); } + //! Put a DWORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newInt32Const(uint32_t scope, int32_t val) noexcept { return newConst(scope, &val, 4); } + //! Put a DWORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newUInt32Const(uint32_t scope, uint32_t val) noexcept { return newConst(scope, &val, 4); } + //! Put a QWORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newInt64Const(uint32_t scope, int64_t val) noexcept { return newConst(scope, &val, 8); } + //! Put a QWORD `val` to a constant-pool. + ASMJIT_INLINE X86Mem newUInt64Const(uint32_t scope, uint64_t val) noexcept { return newConst(scope, &val, 8); } + + //! Put a SP-FP `val` to a constant-pool. + ASMJIT_INLINE X86Mem newFloatConst(uint32_t scope, float val) noexcept { return newConst(scope, &val, 4); } + //! Put a DP-FP `val` to a constant-pool. + ASMJIT_INLINE X86Mem newDoubleConst(uint32_t scope, double val) noexcept { return newConst(scope, &val, 8); } + + //! Put a MMX `val` to a constant-pool. + ASMJIT_INLINE X86Mem newMmConst(uint32_t scope, const Vec64& val) noexcept { return newConst(scope, &val, 8); } + //! Put a XMM `val` to a constant-pool. + ASMJIT_INLINE X86Mem newXmmConst(uint32_t scope, const Vec128& val) noexcept { return newConst(scope, &val, 16); } + //! Put a YMM `val` to a constant-pool. + ASMJIT_INLINE X86Mem newYmmConst(uint32_t scope, const Vec256& val) noexcept { return newConst(scope, &val, 32); } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! Add 8-bit integer data to the instruction stream. + ASMJIT_INLINE Error db(uint8_t x) noexcept { return embed(&x, 1); } + //! Add 16-bit integer data to the instruction stream. + ASMJIT_INLINE Error dw(uint16_t x) noexcept { return embed(&x, 2); } + //! Add 32-bit integer data to the instruction stream. + ASMJIT_INLINE Error dd(uint32_t x) noexcept { return embed(&x, 4); } + //! Add 64-bit integer data to the instruction stream. + ASMJIT_INLINE Error dq(uint64_t x) noexcept { return embed(&x, 8); } + + //! Add 8-bit integer data to the instruction stream. + ASMJIT_INLINE Error dint8(int8_t x) noexcept { return embed(&x, static_cast(sizeof(int8_t))); } + //! Add 8-bit integer data to the instruction stream. + ASMJIT_INLINE Error duint8(uint8_t x) noexcept { return embed(&x, static_cast(sizeof(uint8_t))); } + + //! Add 16-bit integer data to the instruction stream. + ASMJIT_INLINE Error dint16(int16_t x) noexcept { return embed(&x, static_cast(sizeof(int16_t))); } + //! Add 16-bit integer data to the instruction stream. + ASMJIT_INLINE Error duint16(uint16_t x) noexcept { return embed(&x, static_cast(sizeof(uint16_t))); } + + //! Add 32-bit integer data to the instruction stream. + ASMJIT_INLINE Error dint32(int32_t x) noexcept { return embed(&x, static_cast(sizeof(int32_t))); } + //! Add 32-bit integer data to the instruction stream. + ASMJIT_INLINE Error duint32(uint32_t x) noexcept { return embed(&x, static_cast(sizeof(uint32_t))); } + + //! Add 64-bit integer data to the instruction stream. + ASMJIT_INLINE Error dint64(int64_t x) noexcept { return embed(&x, static_cast(sizeof(int64_t))); } + //! Add 64-bit integer data to the instruction stream. + ASMJIT_INLINE Error duint64(uint64_t x) noexcept { return embed(&x, static_cast(sizeof(uint64_t))); } + + //! Add float data to the instruction stream. + ASMJIT_INLINE Error dfloat(float x) noexcept { return embed(&x, static_cast(sizeof(float))); } + //! Add double data to the instruction stream. + ASMJIT_INLINE Error ddouble(double x) noexcept { return embed(&x, static_cast(sizeof(double))); } + + //! Add MMX data to the instruction stream. + ASMJIT_INLINE Error dmm(const Vec64& x) noexcept { return embed(&x, static_cast(sizeof(Vec64))); } + //! Add XMM data to the instruction stream. + ASMJIT_INLINE Error dxmm(const Vec128& x) noexcept { return embed(&x, static_cast(sizeof(Vec128))); } + //! Add YMM data to the instruction stream. + ASMJIT_INLINE Error dymm(const Vec256& x) noexcept { return embed(&x, static_cast(sizeof(Vec256))); } + + //! Add data in a given structure instance to the instruction stream. + template + ASMJIT_INLINE Error dstruct(const T& x) noexcept { return embed(&x, static_cast(sizeof(T))); } + + // ------------------------------------------------------------------------- + // [Instruction Options] + // ------------------------------------------------------------------------- + + ASMJIT_X86_EMIT_OPTIONS(X86Compiler) + + //! Force the compiler to not follow the conditional or unconditional jump. + ASMJIT_INLINE X86Compiler& unfollow() noexcept { + _instOptions |= kInstOptionUnfollow; + return *this; + } + + //! Tell the compiler that the destination variable will be overwritten. + ASMJIT_INLINE X86Compiler& overwrite() noexcept { + _instOptions |= kInstOptionOverwrite; + return *this; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Count of registers depending on the current architecture. + X86RegCount _regCount; + + //! EAX or RAX register depending on the current architecture. + X86GpReg zax; + //! ECX or RCX register depending on the current architecture. + X86GpReg zcx; + //! EDX or RDX register depending on the current architecture. + X86GpReg zdx; + //! EBX or RBX register depending on the current architecture. + X86GpReg zbx; + //! ESP or RSP register depending on the current architecture. + X86GpReg zsp; + //! EBP or RBP register depending on the current architecture. + X86GpReg zbp; + //! ESI or RSI register depending on the current architecture. + X86GpReg zsi; + //! EDI or RDI register depending on the current architecture. + X86GpReg zdi; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + +#define INST_0x(_Inst_, _Code_) \ + ASMJIT_INLINE HLInst* _Inst_() noexcept { \ + return emit(_Code_); \ + } + +#define INST_1x(_Inst_, _Code_, _Op0_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0) noexcept { \ + return emit(_Code_, o0); \ + } + +#define INST_1i(_Inst_, _Code_, _Op0_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0) noexcept { \ + return emit(_Code_, o0); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(int o0) noexcept { \ + return emit(_Code_, o0); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(unsigned int o0) noexcept { \ + return emit(_Code_, static_cast(o0)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(int64_t o0) noexcept { \ + return emit(_Code_, static_cast(o0)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(uint64_t o0) noexcept { \ + return emit(_Code_, o0); \ + } + +#define INST_1cc(_Inst_, _Code_, _Translate_, _Op0_) \ + ASMJIT_INLINE HLInst* _Inst_(uint32_t cc, const _Op0_& o0) noexcept { \ + return emit(_Translate_(cc), o0); \ + } \ + \ + ASMJIT_INLINE HLInst* _Inst_##a(const _Op0_& o0) noexcept { return emit(_Code_##a, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##ae(const _Op0_& o0) noexcept { return emit(_Code_##ae, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##b(const _Op0_& o0) noexcept { return emit(_Code_##b, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##be(const _Op0_& o0) noexcept { return emit(_Code_##be, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##c(const _Op0_& o0) noexcept { return emit(_Code_##c, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##e(const _Op0_& o0) noexcept { return emit(_Code_##e, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##g(const _Op0_& o0) noexcept { return emit(_Code_##g, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##ge(const _Op0_& o0) noexcept { return emit(_Code_##ge, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##l(const _Op0_& o0) noexcept { return emit(_Code_##l, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##le(const _Op0_& o0) noexcept { return emit(_Code_##le, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##na(const _Op0_& o0) noexcept { return emit(_Code_##na, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nae(const _Op0_& o0) noexcept { return emit(_Code_##nae, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nb(const _Op0_& o0) noexcept { return emit(_Code_##nb, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nbe(const _Op0_& o0) noexcept { return emit(_Code_##nbe, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nc(const _Op0_& o0) noexcept { return emit(_Code_##nc, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##ne(const _Op0_& o0) noexcept { return emit(_Code_##ne, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##ng(const _Op0_& o0) noexcept { return emit(_Code_##ng, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nge(const _Op0_& o0) noexcept { return emit(_Code_##nge, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nl(const _Op0_& o0) noexcept { return emit(_Code_##nl, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nle(const _Op0_& o0) noexcept { return emit(_Code_##nle, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##no(const _Op0_& o0) noexcept { return emit(_Code_##no, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##np(const _Op0_& o0) noexcept { return emit(_Code_##np, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##ns(const _Op0_& o0) noexcept { return emit(_Code_##ns, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##nz(const _Op0_& o0) noexcept { return emit(_Code_##nz, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##o(const _Op0_& o0) noexcept { return emit(_Code_##o, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##p(const _Op0_& o0) noexcept { return emit(_Code_##p, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##pe(const _Op0_& o0) noexcept { return emit(_Code_##pe, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##po(const _Op0_& o0) noexcept { return emit(_Code_##po, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##s(const _Op0_& o0) noexcept { return emit(_Code_##s, o0); } \ + ASMJIT_INLINE HLInst* _Inst_##z(const _Op0_& o0) noexcept { return emit(_Code_##z, o0); } + +#define INST_2x(_Inst_, _Code_, _Op0_, _Op1_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1) noexcept { \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2i(_Inst_, _Code_, _Op0_, _Op1_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1) noexcept { \ + return emit(_Code_, o0, o1); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, int o1) noexcept { \ + return emit(_Code_, o0, o1); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, unsigned int o1) noexcept { \ + return emit(_Code_, o0, static_cast(o1)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, int64_t o1) noexcept { \ + return emit(_Code_, o0, static_cast(o1)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, uint64_t o1) noexcept { \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2cc(_Inst_, _Code_, _Translate_, _Op0_, _Op1_) \ + ASMJIT_INLINE HLInst* _Inst_(uint32_t cc, const _Op0_& o0, const _Op1_& o1) noexcept { \ + return emit(_Translate_(cc), o0, o1); \ + } \ + \ + ASMJIT_INLINE HLInst* _Inst_##a(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##a, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##ae(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##ae, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##b(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##b, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##be(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##be, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##c(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##c, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##e(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##e, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##g(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##g, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##ge(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##ge, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##l(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##l, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##le(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##le, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##na(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##na, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nae(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nae, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nb(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nb, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nbe(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nbe, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nc(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nc, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##ne(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##ne, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##ng(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##ng, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nge(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nge, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nl(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nl, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nle(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nle, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##no(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##no, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##np(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##np, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##ns(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##ns, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##nz(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##nz, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##o(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##o, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##p(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##p, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##pe(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##pe, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##po(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##po, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##s(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##s, o0, o1); } \ + ASMJIT_INLINE HLInst* _Inst_##z(const _Op0_& o0, const _Op1_& o1) noexcept { return emit(_Code_##z, o0, o1); } + +#define INST_3x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) noexcept { \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3i(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) noexcept { \ + return emit(_Code_, o0, o1, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) noexcept { \ + return emit(_Code_, o0, o1, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, unsigned int o2) noexcept { \ + return emit(_Code_, o0, o1, static_cast(o2)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2) noexcept { \ + return emit(_Code_, o0, o1, static_cast(o2)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, uint64_t o2) noexcept { \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) noexcept { \ + return emit(_Code_, o0, o1, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, int o1, int o2) noexcept { \ + Imm o1Imm(o1); \ + return emit(_Code_, o0, o1Imm, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, unsigned int o1, unsigned int o2) noexcept { \ + Imm o1Imm(o1); \ + return emit(_Code_, o0, o1Imm, static_cast(o2)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, int64_t o1, int64_t o2) noexcept { \ + Imm o1Imm(o1); \ + return emit(_Code_, o0, o1Imm, static_cast(o2)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, uint64_t o1, uint64_t o2) noexcept { \ + Imm o1Imm(o1); \ + return emit(_Code_, o0, o1Imm, o2); \ + } + +#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) noexcept { \ + return emit(_Code_, o0, o1, o2, o3); \ + } + +#define INST_4i(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) noexcept { \ + return emit(_Code_, o0, o1, o2, o3); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int o3) noexcept { \ + return emit(_Code_, o0, o1, o2, o3); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, unsigned int o3) noexcept { \ + return emit(_Code_, o0, o1, o2, static_cast(o3)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int64_t o3) noexcept { \ + return emit(_Code_, o0, o1, o2, static_cast(o3)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, uint64_t o3) noexcept { \ + return emit(_Code_, o0, o1, o2, o3); \ + } + +#define INST_4ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) noexcept { \ + return emit(_Code_, o0, o1, o2, o3); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2, int o3) noexcept { \ + Imm o2Imm(o2); \ + return emit(_Code_, o0, o1, o2Imm, o3); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, unsigned int o2, unsigned int o3) noexcept { \ + Imm o2Imm(o2); \ + return emit(_Code_, o0, o1, o2Imm, static_cast(o3)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2, int64_t o3) noexcept { \ + Imm o2Imm(o2); \ + return emit(_Code_, o0, o1, o2Imm, static_cast(o3)); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE HLInst* _Inst_(const _Op0_& o0, const _Op1_& o1, uint64_t o2, uint64_t o3) noexcept { \ + Imm o2Imm(o2); \ + return emit(_Code_, o0, o1, o2Imm, o3); \ + } + + // -------------------------------------------------------------------------- + // [X86/X64] + // -------------------------------------------------------------------------- + + //! Add with carry. + INST_2x(adc, kX86InstIdAdc, X86GpVar, X86GpVar) + //! \overload + INST_2x(adc, kX86InstIdAdc, X86GpVar, X86Mem) + //! \overload + INST_2i(adc, kX86InstIdAdc, X86GpVar, Imm) + //! \overload + INST_2x(adc, kX86InstIdAdc, X86Mem, X86GpVar) + //! \overload + INST_2i(adc, kX86InstIdAdc, X86Mem, Imm) + + //! Add. + INST_2x(add, kX86InstIdAdd, X86GpVar, X86GpVar) + //! \overload + INST_2x(add, kX86InstIdAdd, X86GpVar, X86Mem) + //! \overload + INST_2i(add, kX86InstIdAdd, X86GpVar, Imm) + //! \overload + INST_2x(add, kX86InstIdAdd, X86Mem, X86GpVar) + //! \overload + INST_2i(add, kX86InstIdAdd, X86Mem, Imm) + + //! And. + INST_2x(and_, kX86InstIdAnd, X86GpVar, X86GpVar) + //! \overload + INST_2x(and_, kX86InstIdAnd, X86GpVar, X86Mem) + //! \overload + INST_2i(and_, kX86InstIdAnd, X86GpVar, Imm) + //! \overload + INST_2x(and_, kX86InstIdAnd, X86Mem, X86GpVar) + //! \overload + INST_2i(and_, kX86InstIdAnd, X86Mem, Imm) + + //! Bit scan forward. + INST_2x(bsf, kX86InstIdBsf, X86GpVar, X86GpVar) + //! \overload + INST_2x(bsf, kX86InstIdBsf, X86GpVar, X86Mem) + + //! Bit scan reverse. + INST_2x(bsr, kX86InstIdBsr, X86GpVar, X86GpVar) + //! \overload + INST_2x(bsr, kX86InstIdBsr, X86GpVar, X86Mem) + + //! Byte swap (32-bit or 64-bit registers only) (i486). + INST_1x(bswap, kX86InstIdBswap, X86GpVar) + + //! Bit test. + INST_2x(bt, kX86InstIdBt, X86GpVar, X86GpVar) + //! \overload + INST_2i(bt, kX86InstIdBt, X86GpVar, Imm) + //! \overload + INST_2x(bt, kX86InstIdBt, X86Mem, X86GpVar) + //! \overload + INST_2i(bt, kX86InstIdBt, X86Mem, Imm) + + //! Bit test and complement. + INST_2x(btc, kX86InstIdBtc, X86GpVar, X86GpVar) + //! \overload + INST_2i(btc, kX86InstIdBtc, X86GpVar, Imm) + //! \overload + INST_2x(btc, kX86InstIdBtc, X86Mem, X86GpVar) + //! \overload + INST_2i(btc, kX86InstIdBtc, X86Mem, Imm) + + //! Bit test and reset. + INST_2x(btr, kX86InstIdBtr, X86GpVar, X86GpVar) + //! \overload + INST_2i(btr, kX86InstIdBtr, X86GpVar, Imm) + //! \overload + INST_2x(btr, kX86InstIdBtr, X86Mem, X86GpVar) + //! \overload + INST_2i(btr, kX86InstIdBtr, X86Mem, Imm) + + //! Bit test and set. + INST_2x(bts, kX86InstIdBts, X86GpVar, X86GpVar) + //! \overload + INST_2i(bts, kX86InstIdBts, X86GpVar, Imm) + //! \overload + INST_2x(bts, kX86InstIdBts, X86Mem, X86GpVar) + //! \overload + INST_2i(bts, kX86InstIdBts, X86Mem, Imm) + + //! Call a function. + ASMJIT_INLINE X86CallNode* call(const X86GpVar& dst, const FuncPrototype& p) { + return addCall(dst, p); + } + //! \overload + ASMJIT_INLINE X86CallNode* call(const X86Mem& dst, const FuncPrototype& p) { + return addCall(dst, p); + } + //! \overload + ASMJIT_INLINE X86CallNode* call(const Label& label, const FuncPrototype& p) { + return addCall(label, p); + } + //! \overload + ASMJIT_INLINE X86CallNode* call(const Imm& dst, const FuncPrototype& p) { + return addCall(dst, p); + } + //! \overload + ASMJIT_INLINE X86CallNode* call(Ptr dst, const FuncPrototype& p) { + return addCall(Imm(dst), p); + } + + //! Clear carry flag + INST_0x(clc, kX86InstIdClc) + //! Clear direction flag + INST_0x(cld, kX86InstIdCld) + //! Complement carry Flag. + INST_0x(cmc, kX86InstIdCmc) + + //! Convert BYTE to WORD (AX <- Sign Extend AL). + INST_1x(cbw, kX86InstIdCbw, X86GpVar /* AL */) + //! Convert DWORD to QWORD (EDX:EAX <- Sign Extend EAX). + INST_2x(cdq, kX86InstIdCdq, X86GpVar /* EDX */, X86GpVar /* EAX */) + //! Convert DWORD to QWORD (RAX <- Sign Extend EAX) (X64 Only). + INST_1x(cdqe, kX86InstIdCdqe, X86GpVar /* EAX */) + //! Convert QWORD to DQWORD (RDX:RAX <- Sign Extend RAX) (X64 Only). + INST_2x(cqo, kX86InstIdCdq, X86GpVar /* RDX */, X86GpVar /* RAX */) + //! Convert WORD to DWORD (DX:AX <- Sign Extend AX). + INST_2x(cwd, kX86InstIdCwd, X86GpVar /* DX */, X86GpVar /* AX */) + //! Convert WORD to DWORD (EAX <- Sign Extend AX). + INST_1x(cwde, kX86InstIdCwde, X86GpVar /* EAX */) + + //! Conditional move. + INST_2cc(cmov, kX86InstIdCmov, X86Util::condToCmovcc, X86GpVar, X86GpVar) + //! Conditional move. + INST_2cc(cmov, kX86InstIdCmov, X86Util::condToCmovcc, X86GpVar, X86Mem) + + //! Compare two operands. + INST_2x(cmp, kX86InstIdCmp, X86GpVar, X86GpVar) + //! \overload + INST_2x(cmp, kX86InstIdCmp, X86GpVar, X86Mem) + //! \overload + INST_2i(cmp, kX86InstIdCmp, X86GpVar, Imm) + //! \overload + INST_2x(cmp, kX86InstIdCmp, X86Mem, X86GpVar) + //! \overload + INST_2i(cmp, kX86InstIdCmp, X86Mem, Imm) + + //! Compare BYTE in ES:`o0` and DS:`o1`. + INST_2x(cmpsb, kX86InstIdCmpsB, X86GpVar, X86GpVar) + //! Compare DWORD in ES:`o0` and DS:`o1`. + INST_2x(cmpsd, kX86InstIdCmpsD, X86GpVar, X86GpVar) + //! Compare QWORD in ES:`o0` and DS:`o1` (X64 Only). + INST_2x(cmpsq, kX86InstIdCmpsQ, X86GpVar, X86GpVar) + //! Compare WORD in ES:`o0` and DS:`o1`. + INST_2x(cmpsw, kX86InstIdCmpsW, X86GpVar, X86GpVar) + + //! Compare and exchange (i486). + INST_3x(cmpxchg, kX86InstIdCmpxchg, X86GpVar /* EAX */, X86GpVar, X86GpVar) + //! \overload + INST_3x(cmpxchg, kX86InstIdCmpxchg, X86GpVar /* EAX */, X86Mem, X86GpVar) + + //! Compare and exchange 128-bit value in RDX:RAX with `x_mem` (X64 Only). + ASMJIT_INLINE HLInst* cmpxchg16b( + const X86GpVar& r_edx, const X86GpVar& r_eax, + const X86GpVar& r_ecx, const X86GpVar& r_ebx, + const X86Mem& x_mem) { + + return emit(kX86InstIdCmpxchg16b, r_edx, r_eax, r_ecx, r_ebx, x_mem); + } + + //! Compare and exchange 64-bit value in EDX:EAX with `x_mem` (Pentium). + ASMJIT_INLINE HLInst* cmpxchg8b( + const X86GpVar& r_edx, const X86GpVar& r_eax, + const X86GpVar& r_ecx, const X86GpVar& r_ebx, + const X86Mem& x_mem) { + + return emit(kX86InstIdCmpxchg8b, r_edx, r_eax, r_ecx, r_ebx, x_mem); + } + + //! CPU identification (i486). + ASMJIT_INLINE HLInst* cpuid(const X86GpVar& x_eax, const X86GpVar& w_ebx, const X86GpVar& x_ecx, const X86GpVar& w_edx) { + return emit(kX86InstIdCpuid, x_eax, w_ebx, x_ecx, w_edx); + } + + //! Decimal adjust AL after addition (X86 Only). + INST_1x(daa, kX86InstIdDaa, X86GpVar) + //! Decimal adjust AL after subtraction (X86 Only). + INST_1x(das, kX86InstIdDas, X86GpVar) + + //! Decrement by 1. + INST_1x(dec, kX86InstIdDec, X86GpVar) + //! \overload + INST_1x(dec, kX86InstIdDec, X86Mem) + + //! Unsigned divide (o0:o1 <- o0:o1 / o2). + //! + //! Remainder is stored in `o0`, quotient is stored in `o1`. + INST_3x(div, kX86InstIdDiv, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(div, kX86InstIdDiv, X86GpVar, X86GpVar, X86Mem) + + //! Signed divide (o0:o1 <- o0:o1 / o2). + //! + //! Remainder is stored in `o0`, quotient is stored in `o1`. + INST_3x(idiv, kX86InstIdIdiv, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(idiv, kX86InstIdIdiv, X86GpVar, X86GpVar, X86Mem) + + //! Signed multiply (o0:o1 <- o1 * o2). + //! + //! Hi value is stored in `o0`, lo value is stored in `o1`. + INST_3x(imul, kX86InstIdImul, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(imul, kX86InstIdImul, X86GpVar, X86GpVar, X86Mem) + + //! Signed multiply. + INST_2x(imul, kX86InstIdImul, X86GpVar, X86GpVar) + //! \overload + INST_2x(imul, kX86InstIdImul, X86GpVar, X86Mem) + //! \overload + INST_2i(imul, kX86InstIdImul, X86GpVar, Imm) + + //! Signed multiply. + INST_3i(imul, kX86InstIdImul, X86GpVar, X86GpVar, Imm) + //! \overload + INST_3i(imul, kX86InstIdImul, X86GpVar, X86Mem, Imm) + + //! Increment by 1. + INST_1x(inc, kX86InstIdInc, X86GpVar) + //! \overload + INST_1x(inc, kX86InstIdInc, X86Mem) + + //! Interrupt. + INST_1i(int_, kX86InstIdInt, Imm) + //! Interrupt 3 - trap to debugger. + ASMJIT_INLINE HLInst* int3() { return int_(3); } + + //! Jump to label `label` if condition `cc` is met. + INST_1cc(j, kX86InstIdJ, X86Util::condToJcc, Label) + + //! Short jump if CX/ECX/RCX is zero. + INST_2x(jecxz, kX86InstIdJecxz, X86GpVar, Label) + + //! Jump. + INST_1x(jmp, kX86InstIdJmp, X86GpVar) + //! \overload + INST_1x(jmp, kX86InstIdJmp, X86Mem) + //! \overload + INST_1x(jmp, kX86InstIdJmp, Label) + //! \overload + INST_1x(jmp, kX86InstIdJmp, Imm) + //! \overload + ASMJIT_INLINE HLInst* jmp(Ptr dst) { return jmp(Imm(dst)); } + + //! Load AH from flags. + INST_1x(lahf, kX86InstIdLahf, X86GpVar) + + //! Load effective address + INST_2x(lea, kX86InstIdLea, X86GpVar, X86Mem) + + //! Load BYTE from DS:`o1` to `o0`. + INST_2x(lodsb, kX86InstIdLodsB, X86GpVar, X86GpVar) + //! Load DWORD from DS:`o1` to `o0`. + INST_2x(lodsd, kX86InstIdLodsD, X86GpVar, X86GpVar) + //! Load QWORD from DS:`o1` to `o0` (X64 Only). + INST_2x(lodsq, kX86InstIdLodsQ, X86GpVar, X86GpVar) + //! Load WORD from DS:`o1` to `o0`. + INST_2x(lodsw, kX86InstIdLodsW, X86GpVar, X86GpVar) + + //! Move. + INST_2x(mov, kX86InstIdMov, X86GpVar, X86GpVar) + //! \overload + INST_2x(mov, kX86InstIdMov, X86GpVar, X86Mem) + //! \overload + INST_2i(mov, kX86InstIdMov, X86GpVar, Imm) + //! \overload + INST_2x(mov, kX86InstIdMov, X86Mem, X86GpVar) + //! \overload + INST_2i(mov, kX86InstIdMov, X86Mem, Imm) + + //! Move from segment register. + INST_2x(mov, kX86InstIdMov, X86GpVar, X86SegReg) + //! \overload + INST_2x(mov, kX86InstIdMov, X86Mem, X86SegReg) + //! Move to segment register. + INST_2x(mov, kX86InstIdMov, X86SegReg, X86GpVar) + //! \overload + INST_2x(mov, kX86InstIdMov, X86SegReg, X86Mem) + + //! Move (AL|AX|EAX|RAX <- absolute address in immediate). + INST_2x(mov_ptr, kX86InstIdMovPtr, X86GpReg, Imm); + //! \overload + ASMJIT_INLINE HLInst* mov_ptr(const X86GpReg& o0, Ptr o1) { + ASMJIT_ASSERT(o0.getRegIndex() == 0); + return emit(kX86InstIdMovPtr, o0, Imm(o1)); + } + + //! Move (absolute address in immediate <- AL|AX|EAX|RAX). + INST_2x(mov_ptr, kX86InstIdMovPtr, Imm, X86GpReg); + //! \overload + ASMJIT_INLINE HLInst* mov_ptr(Ptr o0, const X86GpReg& o1) { + ASMJIT_ASSERT(o1.getRegIndex() == 0); + return emit(kX86InstIdMovPtr, Imm(o0), o1); + } + + //! Move data after swapping bytes (SSE3 - Atom). + INST_2x(movbe, kX86InstIdMovbe, X86GpVar, X86Mem); + //! \overload + INST_2x(movbe, kX86InstIdMovbe, X86Mem, X86GpVar); + + //! Load BYTE from DS:`o1` to ES:`o0`. + INST_2x(movsb, kX86InstIdMovsB, X86GpVar, X86GpVar) + //! Load DWORD from DS:`o1` to ES:`o0`. + INST_2x(movsd, kX86InstIdMovsD, X86GpVar, X86GpVar) + //! Load QWORD from DS:`o1` to ES:`o0` (X64 Only). + INST_2x(movsq, kX86InstIdMovsQ, X86GpVar, X86GpVar) + //! Load WORD from DS:`o1` to ES:`o0`. + INST_2x(movsw, kX86InstIdMovsW, X86GpVar, X86GpVar) + + //! Move with sign-extension. + INST_2x(movsx, kX86InstIdMovsx, X86GpVar, X86GpVar) + //! \overload + INST_2x(movsx, kX86InstIdMovsx, X86GpVar, X86Mem) + + //! Move DWORD to QWORD with sign-extension (X64 Only). + INST_2x(movsxd, kX86InstIdMovsxd, X86GpVar, X86GpVar) + //! \overload + INST_2x(movsxd, kX86InstIdMovsxd, X86GpVar, X86Mem) + + //! Move with zero-extension. + INST_2x(movzx, kX86InstIdMovzx, X86GpVar, X86GpVar) + //! \overload + INST_2x(movzx, kX86InstIdMovzx, X86GpVar, X86Mem) + + //! Unsigned multiply (o0:o1 <- o1 * o2). + INST_3x(mul, kX86InstIdMul, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(mul, kX86InstIdMul, X86GpVar, X86GpVar, X86Mem) + + //! Two's complement negation. + INST_1x(neg, kX86InstIdNeg, X86GpVar) + //! \overload + INST_1x(neg, kX86InstIdNeg, X86Mem) + + //! No operation. + INST_0x(nop, kX86InstIdNop) + + //! One's complement negation. + INST_1x(not_, kX86InstIdNot, X86GpVar) + //! \overload + INST_1x(not_, kX86InstIdNot, X86Mem) + + //! Or. + INST_2x(or_, kX86InstIdOr, X86GpVar, X86GpVar) + //! \overload + INST_2x(or_, kX86InstIdOr, X86GpVar, X86Mem) + //! \overload + INST_2i(or_, kX86InstIdOr, X86GpVar, Imm) + //! \overload + INST_2x(or_, kX86InstIdOr, X86Mem, X86GpVar) + //! \overload + INST_2i(or_, kX86InstIdOr, X86Mem, Imm) + + //! Pop a value from the stack. + INST_1x(pop, kX86InstIdPop, X86GpVar) + //! \overload + INST_1x(pop, kX86InstIdPop, X86Mem) + + //! Pop stack into EFLAGS Register (32-bit or 64-bit). + INST_0x(popf, kX86InstIdPopf) + + //! Push WORD or DWORD/QWORD on the stack. + INST_1x(push, kX86InstIdPush, X86GpVar) + //! Push WORD or DWORD/QWORD on the stack. + INST_1x(push, kX86InstIdPush, X86Mem) + //! Push segment register on the stack. + INST_1x(push, kX86InstIdPush, X86SegReg) + //! Push WORD or DWORD/QWORD on the stack. + INST_1i(push, kX86InstIdPush, Imm) + + //! Push EFLAGS register (32-bit or 64-bit) on the stack. + INST_0x(pushf, kX86InstIdPushf) + + //! Rotate bits left. + INST_2x(rcl, kX86InstIdRcl, X86GpVar, X86GpVar) + //! \overload + INST_2x(rcl, kX86InstIdRcl, X86Mem, X86GpVar) + //! Rotate bits left. + INST_2i(rcl, kX86InstIdRcl, X86GpVar, Imm) + //! \overload + INST_2i(rcl, kX86InstIdRcl, X86Mem, Imm) + + //! Rotate bits right. + INST_2x(rcr, kX86InstIdRcr, X86GpVar, X86GpVar) + //! \overload + INST_2x(rcr, kX86InstIdRcr, X86Mem, X86GpVar) + //! Rotate bits right. + INST_2i(rcr, kX86InstIdRcr, X86GpVar, Imm) + //! \overload + INST_2i(rcr, kX86InstIdRcr, X86Mem, Imm) + + //! Read time-stamp counter (Pentium). + INST_2x(rdtsc, kX86InstIdRdtsc, X86GpVar, X86GpVar) + //! Read time-stamp counter and processor id (Pentium). + INST_3x(rdtscp, kX86InstIdRdtscp, X86GpVar, X86GpVar, X86GpVar) + + //! Repeated load ECX/RCX BYTEs from DS:[ESI/RSI] to AL. + INST_3x(rep_lodsb, kX86InstIdRepLodsB, X86GpVar, X86GpVar, X86GpVar) + //! Repeated load ECX/RCX DWORDs from DS:[ESI/RSI] to AL. + INST_3x(rep_lodsd, kX86InstIdRepLodsD, X86GpVar, X86GpVar, X86GpVar) + //! Repeated load ECX/RCX QWORDs from DS:[RSI] to RAX (X64 Only). + INST_3x(rep_lodsq, kX86InstIdRepLodsQ, X86GpVar, X86GpVar, X86GpVar) + //! Repeated load ECX/RCX WORDs from DS:[ESI/RSI] to AX. + INST_3x(rep_lodsw, kX86InstIdRepLodsW, X86GpVar, X86GpVar, X86GpVar) + + //! Repeated move ECX/RCX BYTEs from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_3x(rep_movsb, kX86InstIdRepMovsB, X86GpVar, X86GpVar, X86GpVar) + //! Repeated move ECX/RCX DWORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_3x(rep_movsd, kX86InstIdRepMovsD, X86GpVar, X86GpVar, X86GpVar) + //! Repeated move ECX/RCX QWORDs from DS:[RSI] to ES:[RDI] (X64 Only). + INST_3x(rep_movsq, kX86InstIdRepMovsQ, X86GpVar, X86GpVar, X86GpVar) + //! Repeated move ECX/RCX DWORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_3x(rep_movsw, kX86InstIdRepMovsW, X86GpVar, X86GpVar, X86GpVar) + + //! Repeated fill ECX/RCX BYTEs at ES:[EDI/RDI] with AL. + INST_3x(rep_stosb, kX86InstIdRepStosB, X86GpVar, X86GpVar, X86GpVar) + //! Repeated fill ECX/RCX DWORDs at ES:[EDI/RDI] with EAX. + INST_3x(rep_stosd, kX86InstIdRepStosD, X86GpVar, X86GpVar, X86GpVar) + //! Repeated fill ECX/RCX QWORDs at ES:[RDI] with RAX (X64 Only). + INST_3x(rep_stosq, kX86InstIdRepStosQ, X86GpVar, X86GpVar, X86GpVar) + //! Repeated fill ECX/RCX WORDs at ES:[EDI/RDI] with AX. + INST_3x(rep_stosw, kX86InstIdRepStosW, X86GpVar, X86GpVar, X86GpVar) + + //! Repeated find non-AL BYTEs in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_3x(repe_cmpsb, kX86InstIdRepeCmpsB, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find non-EAX DWORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_3x(repe_cmpsd, kX86InstIdRepeCmpsD, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find non-RAX QWORDs in ES:[RDI] and DS:[RDI] (X64 Only). + INST_3x(repe_cmpsq, kX86InstIdRepeCmpsQ, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find non-AX WORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_3x(repe_cmpsw, kX86InstIdRepeCmpsW, X86GpVar, X86GpVar, X86GpVar) + + //! Repeated find non-AL BYTE starting at ES:[EDI/RDI]. + INST_3x(repe_scasb, kX86InstIdRepeScasB, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find non-EAX DWORD starting at ES:[EDI/RDI]. + INST_3x(repe_scasd, kX86InstIdRepeScasD, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find non-RAX QWORD starting at ES:[RDI] (X64 Only). + INST_3x(repe_scasq, kX86InstIdRepeScasQ, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find non-AX WORD starting at ES:[EDI/RDI]. + INST_3x(repe_scasw, kX86InstIdRepeScasW, X86GpVar, X86GpVar, X86GpVar) + + //! Repeated find AL BYTEs in [RDI] and [RSI]. + INST_3x(repne_cmpsb, kX86InstIdRepneCmpsB, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find EAX DWORDs in [RDI] and [RSI]. + INST_3x(repne_cmpsd, kX86InstIdRepneCmpsD, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find RAX QWORDs in [RDI] and [RSI] (X64 Only). + INST_3x(repne_cmpsq, kX86InstIdRepneCmpsQ, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find AX WORDs in [RDI] and [RSI]. + INST_3x(repne_cmpsw, kX86InstIdRepneCmpsW, X86GpVar, X86GpVar, X86GpVar) + + //! Repeated Find AL BYTEs, starting at ES:[EDI/RDI]. + INST_3x(repne_scasb, kX86InstIdRepneScasB, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find EAX DWORDs, starting at ES:[EDI/RDI]. + INST_3x(repne_scasd, kX86InstIdRepneScasD, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find RAX QWORDs, starting at ES:[RDI] (X64 Only). + INST_3x(repne_scasq, kX86InstIdRepneScasQ, X86GpVar, X86GpVar, X86GpVar) + //! Repeated find AX WORDs, starting at ES:[EDI/RDI]. + INST_3x(repne_scasw, kX86InstIdRepneScasW, X86GpVar, X86GpVar, X86GpVar) + + //! Return. + ASMJIT_INLINE HLRet* ret() { return addRet(noOperand, noOperand); } + //! \overload + ASMJIT_INLINE HLRet* ret(const X86GpVar& o0) { return addRet(o0, noOperand); } + //! \overload + ASMJIT_INLINE HLRet* ret(const X86GpVar& o0, const X86GpVar& o1) { return addRet(o0, o1); } + //! \overload + ASMJIT_INLINE HLRet* ret(const X86XmmVar& o0) { return addRet(o0, noOperand); } + //! \overload + ASMJIT_INLINE HLRet* ret(const X86XmmVar& o0, const X86XmmVar& o1) { return addRet(o0, o1); } + + //! Rotate bits left. + INST_2x(rol, kX86InstIdRol, X86GpVar, X86GpVar) + //! \overload + INST_2x(rol, kX86InstIdRol, X86Mem, X86GpVar) + //! Rotate bits left. + INST_2i(rol, kX86InstIdRol, X86GpVar, Imm) + //! \overload + INST_2i(rol, kX86InstIdRol, X86Mem, Imm) + + //! Rotate bits right. + INST_2x(ror, kX86InstIdRor, X86GpVar, X86GpVar) + //! \overload + INST_2x(ror, kX86InstIdRor, X86Mem, X86GpVar) + //! Rotate bits right. + INST_2i(ror, kX86InstIdRor, X86GpVar, Imm) + //! \overload + INST_2i(ror, kX86InstIdRor, X86Mem, Imm) + + //! Store `a` (allocated in AH/AX/EAX/RAX) into Flags. + INST_1x(sahf, kX86InstIdSahf, X86GpVar) + + //! Integer subtraction with borrow. + INST_2x(sbb, kX86InstIdSbb, X86GpVar, X86GpVar) + //! \overload + INST_2x(sbb, kX86InstIdSbb, X86GpVar, X86Mem) + //! \overload + INST_2i(sbb, kX86InstIdSbb, X86GpVar, Imm) + //! \overload + INST_2x(sbb, kX86InstIdSbb, X86Mem, X86GpVar) + //! \overload + INST_2i(sbb, kX86InstIdSbb, X86Mem, Imm) + + //! Shift bits left. + INST_2x(sal, kX86InstIdSal, X86GpVar, X86GpVar) + //! \overload + INST_2x(sal, kX86InstIdSal, X86Mem, X86GpVar) + //! Shift bits left. + INST_2i(sal, kX86InstIdSal, X86GpVar, Imm) + //! \overload + INST_2i(sal, kX86InstIdSal, X86Mem, Imm) + + //! Shift bits right. + INST_2x(sar, kX86InstIdSar, X86GpVar, X86GpVar) + //! \overload + INST_2x(sar, kX86InstIdSar, X86Mem, X86GpVar) + //! Shift bits right. + INST_2i(sar, kX86InstIdSar, X86GpVar, Imm) + //! \overload + INST_2i(sar, kX86InstIdSar, X86Mem, Imm) + + //! Find non `o1` BYTE starting at ES:`o0`. + INST_2x(scasb, kX86InstIdScasB, X86GpVar, X86GpVar) + //! Find non `o1` DWORD starting at ES:`o0`. + INST_2x(scasd, kX86InstIdScasD, X86GpVar, X86GpVar) + //! Find non `o1` QWORD starting at ES:`o0` (X64 Only). + INST_2x(scasq, kX86InstIdScasQ, X86GpVar, X86GpVar) + //! Find non `o1` WORD starting at ES:`o0`. + INST_2x(scasw, kX86InstIdScasW, X86GpVar, X86GpVar) + + //! Set byte on condition. + INST_1cc(set, kX86InstIdSet, X86Util::condToSetcc, X86GpVar) + //! Set byte on condition. + INST_1cc(set, kX86InstIdSet, X86Util::condToSetcc, X86Mem) + + //! Shift bits left. + INST_2x(shl, kX86InstIdShl, X86GpVar, X86GpVar) + //! \overload + INST_2x(shl, kX86InstIdShl, X86Mem, X86GpVar) + //! Shift bits left. + INST_2i(shl, kX86InstIdShl, X86GpVar, Imm) + //! \overload + INST_2i(shl, kX86InstIdShl, X86Mem, Imm) + + //! Shift bits right. + INST_2x(shr, kX86InstIdShr, X86GpVar, X86GpVar) + //! \overload + INST_2x(shr, kX86InstIdShr, X86Mem, X86GpVar) + //! Shift bits right. + INST_2i(shr, kX86InstIdShr, X86GpVar, Imm) + //! \overload + INST_2i(shr, kX86InstIdShr, X86Mem, Imm) + + //! Double precision shift left. + INST_3x(shld, kX86InstIdShld, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(shld, kX86InstIdShld, X86Mem, X86GpVar, X86GpVar) + //! Double precision shift left. + INST_3i(shld, kX86InstIdShld, X86GpVar, X86GpVar, Imm) + //! \overload + INST_3i(shld, kX86InstIdShld, X86Mem, X86GpVar, Imm) + + //! Double precision shift right. + INST_3x(shrd, kX86InstIdShrd, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(shrd, kX86InstIdShrd, X86Mem, X86GpVar, X86GpVar) + //! Double precision shift right. + INST_3i(shrd, kX86InstIdShrd, X86GpVar, X86GpVar, Imm) + //! \overload + INST_3i(shrd, kX86InstIdShrd, X86Mem, X86GpVar, Imm) + + //! Set carry flag to 1. + INST_0x(stc, kX86InstIdStc) + //! Set direction flag to 1. + INST_0x(std, kX86InstIdStd) + + //! Fill BYTE at ES:`o0` with `o1`. + INST_2x(stosb, kX86InstIdStosB, X86GpVar, X86GpVar) + //! Fill DWORD at ES:`o0` with `o1`. + INST_2x(stosd, kX86InstIdStosD, X86GpVar, X86GpVar) + //! Fill QWORD at ES:`o0` with `o1` (X64 Only). + INST_2x(stosq, kX86InstIdStosQ, X86GpVar, X86GpVar) + //! Fill WORD at ES:`o0` with `o1`. + INST_2x(stosw, kX86InstIdStosW, X86GpVar, X86GpVar) + + //! Subtract. + INST_2x(sub, kX86InstIdSub, X86GpVar, X86GpVar) + //! \overload + INST_2x(sub, kX86InstIdSub, X86GpVar, X86Mem) + //! \overload + INST_2i(sub, kX86InstIdSub, X86GpVar, Imm) + //! \overload + INST_2x(sub, kX86InstIdSub, X86Mem, X86GpVar) + //! \overload + INST_2i(sub, kX86InstIdSub, X86Mem, Imm) + + //! Logical compare. + INST_2x(test, kX86InstIdTest, X86GpVar, X86GpVar) + //! \overload + INST_2i(test, kX86InstIdTest, X86GpVar, Imm) + //! \overload + INST_2x(test, kX86InstIdTest, X86Mem, X86GpVar) + //! \overload + INST_2i(test, kX86InstIdTest, X86Mem, Imm) + + //! Undefined instruction - Raise #UD exception. + INST_0x(ud2, kX86InstIdUd2) + + //! Exchange and add. + INST_2x(xadd, kX86InstIdXadd, X86GpVar, X86GpVar) + //! \overload + INST_2x(xadd, kX86InstIdXadd, X86Mem, X86GpVar) + + //! Exchange register/memory with register. + INST_2x(xchg, kX86InstIdXchg, X86GpVar, X86GpVar) + //! \overload + INST_2x(xchg, kX86InstIdXchg, X86Mem, X86GpVar) + //! \overload + INST_2x(xchg, kX86InstIdXchg, X86GpVar, X86Mem) + + //! Xor. + INST_2x(xor_, kX86InstIdXor, X86GpVar, X86GpVar) + //! \overload + INST_2x(xor_, kX86InstIdXor, X86GpVar, X86Mem) + //! \overload + INST_2i(xor_, kX86InstIdXor, X86GpVar, Imm) + //! \overload + INST_2x(xor_, kX86InstIdXor, X86Mem, X86GpVar) + //! \overload + INST_2i(xor_, kX86InstIdXor, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [FPU] + // -------------------------------------------------------------------------- + + //! Compute `2^x - 1` - `fp0 = POW(2, fp0) - 1` (FPU). + INST_0x(f2xm1, kX86InstIdF2xm1) + //! Abs `fp0 = ABS(fp0)` (FPU). + INST_0x(fabs, kX86InstIdFabs) + + //! Add `o0 = o0 + o1` (one operand has to be `fp0`) (FPU). + INST_2x(fadd, kX86InstIdFadd, X86FpReg, X86FpReg) + //! Add `fp0 = fp0 + float_or_double[o0]` (FPU). + INST_1x(fadd, kX86InstIdFadd, X86Mem) + //! Add `o0 = o0 + fp0` and POP (FPU). + INST_1x(faddp, kX86InstIdFaddp, X86FpReg) + //! Add `fp1 = fp1 + fp0` and POP (FPU). + INST_0x(faddp, kX86InstIdFaddp) + + //! Load BCD from `[o0]` and PUSH (FPU). + INST_1x(fbld, kX86InstIdFbld, X86Mem) + //! Store BCD-Integer to `[o0]` and POP (FPU). + INST_1x(fbstp, kX86InstIdFbstp, X86Mem) + + //! Complement Sign `fp0 = -fp0` (FPU). + INST_0x(fchs, kX86InstIdFchs) + + //! Clear exceptions (FPU). + INST_0x(fclex, kX86InstIdFclex) + + //! Conditional move `if (CF=1) fp0 = o0` (FPU). + INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg) + //! Conditional move `if (CF|ZF=1) fp0 = o0` (FPU). + INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg) + //! Conditional move `if (ZF=1) fp0 = o0` (FPU). + INST_1x(fcmove, kX86InstIdFcmove, X86FpReg) + //! Conditional move `if (CF=0) fp0 = o0` (FPU). + INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg) + //! Conditional move `if (CF|ZF=0) fp0 = o0` (FPU). + INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg) + //! Conditional move `if (ZF=0) fp0 = o0` (FPU). + INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg) + //! Conditional move `if (PF=0) fp0 = o0` (FPU). + INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg) + //! Conditional move `if (PF=1) fp0 = o0` (FPU). + INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg) + + //! Compare `fp0` with `o0` (FPU). + INST_1x(fcom, kX86InstIdFcom, X86FpReg) + //! Compare `fp0` with `fp1` (FPU). + INST_0x(fcom, kX86InstIdFcom) + //! Compare `fp0` with `float_or_double[o0]` (FPU). + INST_1x(fcom, kX86InstIdFcom, X86Mem) + //! Compare `fp0` with `o0` and POP (FPU). + INST_1x(fcomp, kX86InstIdFcomp, X86FpReg) + //! Compare `fp0` with `fp1` and POP (FPU). + INST_0x(fcomp, kX86InstIdFcomp) + //! Compare `fp0` with `float_or_double[o0]` and POP (FPU). + INST_1x(fcomp, kX86InstIdFcomp, X86Mem) + //! Compare `fp0` with `fp1` and POP twice (FPU). + INST_0x(fcompp, kX86InstIdFcompp) + //! Compare `fp0` with `o0` and set EFLAGS (FPU). + INST_1x(fcomi, kX86InstIdFcomi, X86FpReg) + //! Compare `fp0` with `o0` and set EFLAGS and POP (FPU). + INST_1x(fcomip, kX86InstIdFcomip, X86FpReg) + + //! Cos `fp0 = cos(fp0)` (FPU). + INST_0x(fcos, kX86InstIdFcos) + + //! Decrement FPU stack pointer (FPU). + INST_0x(fdecstp, kX86InstIdFdecstp) + + //! Divide `o0 = o0 / o1` (one has to be `fp0`) (FPU). + INST_2x(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg) + //! Divide `fp0 = fp0 / float_or_double[o0]` (FPU). + INST_1x(fdiv, kX86InstIdFdiv, X86Mem) + //! Divide `o0 = o0 / fp0` and POP (FPU). + INST_1x(fdivp, kX86InstIdFdivp, X86FpReg) + //! Divide `fp1 = fp1 / fp0` and POP (FPU). + INST_0x(fdivp, kX86InstIdFdivp) + + //! Reverse divide `o0 = o1 / o0` (one has to be `fp0`) (FPU). + INST_2x(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg) + //! Reverse divide `fp0 = float_or_double[o0] / fp0` (FPU). + INST_1x(fdivr, kX86InstIdFdivr, X86Mem) + //! Reverse divide `o0 = fp0 / o0` and POP (FPU). + INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg) + //! Reverse divide `fp1 = fp0 / fp1` and POP (FPU). + INST_0x(fdivrp, kX86InstIdFdivrp) + + //! Free FP register (FPU). + INST_1x(ffree, kX86InstIdFfree, X86FpReg) + + //! Add `fp0 = fp0 + short_or_int[o0]` (FPU). + INST_1x(fiadd, kX86InstIdFiadd, X86Mem) + //! Compare `fp0` with `short_or_int[o0]` (FPU). + INST_1x(ficom, kX86InstIdFicom, X86Mem) + //! Compare `fp0` with `short_or_int[o0]` and POP (FPU). + INST_1x(ficomp, kX86InstIdFicomp, X86Mem) + //! Divide `fp0 = fp0 / short_or_int[o0]` (FPU). + INST_1x(fidiv, kX86InstIdFidiv, X86Mem) + //! Reverse divide `fp0 = short_or_int[o0] / fp0` (FPU). + INST_1x(fidivr, kX86InstIdFidivr, X86Mem) + + //! Load `short_or_int_or_long[o0]` and PUSH (FPU). + INST_1x(fild, kX86InstIdFild, X86Mem) + //! Multiply `fp0 *= short_or_int[o0]` (FPU). + INST_1x(fimul, kX86InstIdFimul, X86Mem) + + //! Increment FPU stack pointer (FPU). + INST_0x(fincstp, kX86InstIdFincstp) + //! Initialize FPU (FPU). + INST_0x(finit, kX86InstIdFinit) + + //! Subtract `fp0 = fp0 - short_or_int[o0]` (FPU). + INST_1x(fisub, kX86InstIdFisub, X86Mem) + //! Reverse subtract `fp0 = short_or_int[o0] - fp0` (FPU). + INST_1x(fisubr, kX86InstIdFisubr, X86Mem) + + //! Initialize FPU without checking for pending unmasked exceptions (FPU). + INST_0x(fninit, kX86InstIdFninit) + + //! Store `fp0` as `short_or_int[o0]` (FPU). + INST_1x(fist, kX86InstIdFist, X86Mem) + //! Store `fp0` as `short_or_int_or_long[o0]` and POP (FPU). + INST_1x(fistp, kX86InstIdFistp, X86Mem) + + //! Load `float_or_double_or_extended[o0]` and PUSH (FPU). + INST_1x(fld, kX86InstIdFld, X86Mem) + //! PUSH `o0` (FPU). + INST_1x(fld, kX86InstIdFld, X86FpReg) + + //! PUSH `1.0` (FPU). + INST_0x(fld1, kX86InstIdFld1) + //! PUSH `log2(10)` (FPU). + INST_0x(fldl2t, kX86InstIdFldl2t) + //! PUSH `log2(e)` (FPU). + INST_0x(fldl2e, kX86InstIdFldl2e) + //! PUSH `pi` (FPU). + INST_0x(fldpi, kX86InstIdFldpi) + //! PUSH `log10(2)` (FPU). + INST_0x(fldlg2, kX86InstIdFldlg2) + //! PUSH `ln(2)` (FPU). + INST_0x(fldln2, kX86InstIdFldln2) + //! PUSH `+0.0` (FPU). + INST_0x(fldz, kX86InstIdFldz) + + //! Load x87 FPU control word from `word_ptr[o0]` (FPU). + INST_1x(fldcw, kX86InstIdFldcw, X86Mem) + //! Load x87 FPU environment (14 or 28 bytes) from `[o0]` (FPU). + INST_1x(fldenv, kX86InstIdFldenv, X86Mem) + + //! Multiply `o0 = o0 * o1` (one has to be `fp0`) (FPU). + INST_2x(fmul, kX86InstIdFmul, X86FpReg, X86FpReg) + //! Multiply `fp0 = fp0 * float_or_double[o0]` (FPU). + INST_1x(fmul, kX86InstIdFmul, X86Mem) + //! Multiply `o0 = o0 * fp0` and POP (FPU). + INST_1x(fmulp, kX86InstIdFmulp, X86FpReg) + //! Multiply `fp1 = fp1 * fp0` and POP (FPU). + INST_0x(fmulp, kX86InstIdFmulp) + + //! Clear exceptions (FPU). + INST_0x(fnclex, kX86InstIdFnclex) + //! No operation (FPU). + INST_0x(fnop, kX86InstIdFnop) + //! Save FPU state to `[o0]` (FPU). + INST_1x(fnsave, kX86InstIdFnsave, X86Mem) + //! Store x87 FPU environment to `[o0]` (FPU). + INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem) + //! Store x87 FPU control word to `[o0]` (FPU). + INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem) + + //! Store x87 FPU status word to `o0` (AX) (FPU). + INST_1x(fnstsw, kX86InstIdFnstsw, X86GpVar) + //! Store x87 FPU status word to `word_ptr[o0]` (FPU). + INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem) + + //! Partial Arctan `fp1 = atan2(fp1, fp0)` and POP (FPU). + INST_0x(fpatan, kX86InstIdFpatan) + //! Partial Remainder[Trunc] `fp1 = fp0 % fp1` and POP (FPU). + INST_0x(fprem, kX86InstIdFprem) + //! Partial Remainder[Round] `fp1 = fp0 % fp1` and POP (FPU). + INST_0x(fprem1, kX86InstIdFprem1) + //! Partial Tan `fp0 = tan(fp0)` and PUSH `1.0` (FPU). + INST_0x(fptan, kX86InstIdFptan) + //! Round `fp0 = round(fp0)` (FPU). + INST_0x(frndint, kX86InstIdFrndint) + + //! Restore FPU state from `[o0]` (94 or 108 bytes) (FPU). + INST_1x(frstor, kX86InstIdFrstor, X86Mem) + //! Save FPU state to `[o0]` (94 or 108 bytes) (FPU). + INST_1x(fsave, kX86InstIdFsave, X86Mem) + + //! Scale `fp0 = fp0 * pow(2, RoundTowardsZero(fp1))` (FPU). + INST_0x(fscale, kX86InstIdFscale) + //! Sin `fp0 = sin(fp0)` (FPU). + INST_0x(fsin, kX86InstIdFsin) + //! Sincos `fp0 = sin(fp0)` and PUSH `cos(fp0)` (FPU). + INST_0x(fsincos, kX86InstIdFsincos) + //! Square root `fp0 = sqrt(fp0)` (FPU). + INST_0x(fsqrt, kX86InstIdFsqrt) + + //! Store floating point value to `float_or_double[o0]` (FPU). + INST_1x(fst, kX86InstIdFst, X86Mem) + //! Copy `o0 = fp0` (FPU). + INST_1x(fst, kX86InstIdFst, X86FpReg) + //! Store floating point value to `float_or_double_or_extended[o0]` and POP (FPU). + INST_1x(fstp, kX86InstIdFstp, X86Mem) + //! Copy `o0 = fp0` and POP (FPU). + INST_1x(fstp, kX86InstIdFstp, X86FpReg) + + //! Store x87 FPU control word to `word_ptr[o0]` (FPU). + INST_1x(fstcw, kX86InstIdFstcw, X86Mem) + //! Store x87 FPU environment to `[o0]` (14 or 28 bytes) (FPU). + INST_1x(fstenv, kX86InstIdFstenv, X86Mem) + //! Store x87 FPU status word to `o0` (AX) (FPU). + INST_1x(fstsw, kX86InstIdFstsw, X86GpVar) + //! Store x87 FPU status word to `word_ptr[o0]` (FPU). + INST_1x(fstsw, kX86InstIdFstsw, X86Mem) + + //! Subtract `o0 = o0 - o1` (one has to be `fp0`) (FPU). + INST_2x(fsub, kX86InstIdFsub, X86FpReg, X86FpReg) + //! Subtract `fp0 = fp0 - float_or_double[o0]` (FPU). + INST_1x(fsub, kX86InstIdFsub, X86Mem) + //! Subtract `o0 = o0 - fp0` and POP (FPU). + INST_1x(fsubp, kX86InstIdFsubp, X86FpReg) + //! Subtract `fp1 = fp1 - fp0` and POP (FPU). + INST_0x(fsubp, kX86InstIdFsubp) + + //! Reverse subtract `o0 = o1 - o0` (one has to be `fp0`) (FPU). + INST_2x(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg) + //! Reverse subtract `fp0 = fp0 - float_or_double[o0]` (FPU). + INST_1x(fsubr, kX86InstIdFsubr, X86Mem) + //! Reverse subtract `o0 = o0 - fp0` and POP (FPU). + INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg) + //! Reverse subtract `fp1 = fp1 - fp0` and POP (FPU). + INST_0x(fsubrp, kX86InstIdFsubrp) + + //! Compare `fp0` with `0.0` (FPU). + INST_0x(ftst, kX86InstIdFtst) + + //! Unordered compare `fp0` with `o0` (FPU). + INST_1x(fucom, kX86InstIdFucom, X86FpReg) + //! Unordered compare `fp0` with `fp1` (FPU). + INST_0x(fucom, kX86InstIdFucom) + //! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS (FPU). + INST_1x(fucomi, kX86InstIdFucomi, X86FpReg) + //! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS and POP (FPU). + INST_1x(fucomip, kX86InstIdFucomip, X86FpReg) + //! Unordered compare `fp0` with `o0` and POP (FPU). + INST_1x(fucomp, kX86InstIdFucomp, X86FpReg) + //! Unordered compare `fp0` with `fp1` and POP (FPU). + INST_0x(fucomp, kX86InstIdFucomp) + //! Unordered compare `fp0` with `fp1` and POP twice (FPU). + INST_0x(fucompp, kX86InstIdFucompp) + + INST_0x(fwait, kX86InstIdFwait) + + //! Examine fp0 (FPU). + INST_0x(fxam, kX86InstIdFxam) + //! Exchange `fp0` with `o0` (FPU). + INST_1x(fxch, kX86InstIdFxch, X86FpReg) + + //! Extract `fp0 = exponent(fp0)` and PUSH `significant(fp0)` (FPU). + INST_0x(fxtract, kX86InstIdFxtract) + + //! Compute `fp1 = fp1 * log2(fp0)` and POP (FPU). + INST_0x(fyl2x, kX86InstIdFyl2x) + //! Compute `fp1 = fp1 * log2(fp0 + 1)` and POP (FPU). + INST_0x(fyl2xp1, kX86InstIdFyl2xp1) + + // -------------------------------------------------------------------------- + // [FXSR] + // -------------------------------------------------------------------------- + + //! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR). + INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem) + //! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR & X64). + INST_1x(fxrstor64, kX86InstIdFxrstor64, X86Mem) + //! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR). + INST_1x(fxsave, kX86InstIdFxsave, X86Mem) + //! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FXSR & X64). + INST_1x(fxsave64, kX86InstIdFxsave64, X86Mem) + + // -------------------------------------------------------------------------- + // [XSAVE] + // -------------------------------------------------------------------------- + + //! Restore Processor Extended States specified by `o1:o2` (XSAVE). + INST_3x(xrstor, kX86InstIdXrstor, X86Mem, X86GpVar, X86GpVar) + //! Restore Processor Extended States specified by `o1:o2` (XSAVE & X64). + INST_3x(xrstor64, kX86InstIdXrstor64, X86Mem, X86GpVar, X86GpVar) + + //! Save Processor Extended States specified by `o1:o2` (XSAVE). + INST_3x(xsave, kX86InstIdXsave, X86Mem, X86GpVar, X86GpVar) + //! Save Processor Extended States specified by `o1:o2` (XSAVE & X64). + INST_3x(xsave64, kX86InstIdXsave64, X86Mem, X86GpVar, X86GpVar) + + //! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT). + INST_3x(xsaveopt, kX86InstIdXsaveopt, X86Mem, X86GpVar, X86GpVar) + //! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT & X64). + INST_3x(xsaveopt64, kX86InstIdXsaveopt64, X86Mem, X86GpVar, X86GpVar) + + //! Get XCR - `o1:o2 <- XCR[o0]` (`EDX:EAX <- XCR[ECX]`) (XSAVE). + INST_3x(xgetbv, kX86InstIdXgetbv, X86GpVar, X86GpVar, X86GpVar) + //! Set XCR - `XCR[o0] <- o1:o2` (`XCR[ECX] <- EDX:EAX`) (XSAVE). + INST_3x(xsetbv, kX86InstIdXsetbv, X86GpVar, X86GpVar, X86GpVar) + + // -------------------------------------------------------------------------- + // [POPCNT] + // -------------------------------------------------------------------------- + + //! Return the count of number of bits set to 1 (POPCNT). + INST_2x(popcnt, kX86InstIdPopcnt, X86GpVar, X86GpVar) + //! \overload + INST_2x(popcnt, kX86InstIdPopcnt, X86GpVar, X86Mem) + + // -------------------------------------------------------------------------- + // [LZCNT] + // -------------------------------------------------------------------------- + + //! Count the number of leading zero bits (LZCNT). + INST_2x(lzcnt, kX86InstIdLzcnt, X86GpVar, X86GpVar) + //! \overload + INST_2x(lzcnt, kX86InstIdLzcnt, X86GpVar, X86Mem) + + // -------------------------------------------------------------------------- + // [BMI] + // -------------------------------------------------------------------------- + + //! Bitwise and-not (BMI). + INST_3x(andn, kX86InstIdAndn, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(andn, kX86InstIdAndn, X86GpVar, X86GpVar, X86Mem) + + //! Bit field extract (BMI). + INST_3x(bextr, kX86InstIdBextr, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(bextr, kX86InstIdBextr, X86GpVar, X86Mem, X86GpVar) + + //! Extract lower set isolated bit (BMI). + INST_2x(blsi, kX86InstIdBlsi, X86GpVar, X86GpVar) + //! \overload + INST_2x(blsi, kX86InstIdBlsi, X86GpVar, X86Mem) + + //! Get mask up to lowest set bit (BMI). + INST_2x(blsmsk, kX86InstIdBlsmsk, X86GpVar, X86GpVar) + //! \overload + INST_2x(blsmsk, kX86InstIdBlsmsk, X86GpVar, X86Mem) + + //! Reset lowest set bit (BMI). + INST_2x(blsr, kX86InstIdBlsr, X86GpVar, X86GpVar) + //! \overload + INST_2x(blsr, kX86InstIdBlsr, X86GpVar, X86Mem) + + //! Count the number of trailing zero bits (BMI). + INST_2x(tzcnt, kX86InstIdTzcnt, X86GpVar, X86GpVar) + //! \overload + INST_2x(tzcnt, kX86InstIdTzcnt, X86GpVar, X86Mem) + + // -------------------------------------------------------------------------- + // [BMI2] + // -------------------------------------------------------------------------- + + //! Zero high bits starting with specified bit position (BMI2). + INST_3x(bzhi, kX86InstIdBzhi, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(bzhi, kX86InstIdBzhi, X86GpVar, X86Mem, X86GpVar) + + //! Unsigned multiply without affecting flags (BMI2). + INST_3x(mulx, kX86InstIdMulx, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(mulx, kX86InstIdMulx, X86GpVar, X86GpVar, X86Mem) + + //! Parallel bits deposit (BMI2). + INST_3x(pdep, kX86InstIdPdep, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(pdep, kX86InstIdPdep, X86GpVar, X86GpVar, X86Mem) + + //! Parallel bits extract (BMI2). + INST_3x(pext, kX86InstIdPext, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(pext, kX86InstIdPext, X86GpVar, X86GpVar, X86Mem) + + //! Rotate right without affecting flags (BMI2). + INST_3i(rorx, kX86InstIdRorx, X86GpVar, X86GpVar, Imm) + //! \overload + INST_3i(rorx, kX86InstIdRorx, X86GpVar, X86Mem, Imm) + + //! Shift arithmetic right without affecting flags (BMI2). + INST_3x(sarx, kX86InstIdSarx, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(sarx, kX86InstIdSarx, X86GpVar, X86Mem, X86GpVar) + + //! Shift logical left without affecting flags (BMI2). + INST_3x(shlx, kX86InstIdShlx, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(shlx, kX86InstIdShlx, X86GpVar, X86Mem, X86GpVar) + + //! Shift logical right without affecting flags (BMI2). + INST_3x(shrx, kX86InstIdShrx, X86GpVar, X86GpVar, X86GpVar) + //! \overload + INST_3x(shrx, kX86InstIdShrx, X86GpVar, X86Mem, X86GpVar) + + // -------------------------------------------------------------------------- + // [ADX] + // -------------------------------------------------------------------------- + + //! Unsigned integer addition of two operands with carry flag (ADX). + INST_2x(adcx, kX86InstIdAdcx, X86GpVar, X86GpVar) + //! \overload + INST_2x(adcx, kX86InstIdAdcx, X86GpVar, X86Mem) + + //! Unsigned integer addition of two operands with overflow flag (ADX). + INST_2x(adox, kX86InstIdAdox, X86GpVar, X86GpVar) + //! \overload + INST_2x(adox, kX86InstIdAdox, X86GpVar, X86Mem) + + // -------------------------------------------------------------------------- + // [TBM] + // -------------------------------------------------------------------------- + + //! Fill from lowest clear bit (TBM). + INST_2x(blcfill, kX86InstIdBlcfill, X86GpVar, X86GpVar) + //! \overload + INST_2x(blcfill, kX86InstIdBlcfill, X86GpVar, X86Mem) + + //! Isolate lowest clear bit (TBM). + INST_2x(blci, kX86InstIdBlci, X86GpVar, X86GpVar) + //! \overload + INST_2x(blci, kX86InstIdBlci, X86GpVar, X86Mem) + + //! Isolate lowest clear bit and complement (TBM). + INST_2x(blcic, kX86InstIdBlcic, X86GpVar, X86GpVar) + //! \overload + INST_2x(blcic, kX86InstIdBlcic, X86GpVar, X86Mem) + + //! Mask from lowest clear bit (TBM). + INST_2x(blcmsk, kX86InstIdBlcmsk, X86GpVar, X86GpVar) + //! \overload + INST_2x(blcmsk, kX86InstIdBlcmsk, X86GpVar, X86Mem) + + //! Set lowest clear bit (TBM). + INST_2x(blcs, kX86InstIdBlcs, X86GpVar, X86GpVar) + //! \overload + INST_2x(blcs, kX86InstIdBlcs, X86GpVar, X86Mem) + + //! Fill from lowest set bit (TBM). + INST_2x(blsfill, kX86InstIdBlsfill, X86GpVar, X86GpVar) + //! \overload + INST_2x(blsfill, kX86InstIdBlsfill, X86GpVar, X86Mem) + + //! Isolate lowest set bit and complement (TBM). + INST_2x(blsic, kX86InstIdBlsic, X86GpVar, X86GpVar) + //! \overload + INST_2x(blsic, kX86InstIdBlsic, X86GpVar, X86Mem) + + //! Inverse mask from trailing ones (TBM) + INST_2x(t1mskc, kX86InstIdT1mskc, X86GpVar, X86GpVar) + //! \overload + INST_2x(t1mskc, kX86InstIdT1mskc, X86GpVar, X86Mem) + + //! Mask from trailing zeros (TBM) + INST_2x(tzmsk, kX86InstIdTzmsk, X86GpVar, X86GpVar) + //! \overload + INST_2x(tzmsk, kX86InstIdTzmsk, X86GpVar, X86Mem) + + // -------------------------------------------------------------------------- + // [CLFLUSH / CLFLUSH_OPT] + // -------------------------------------------------------------------------- + + //! Flush cache line (CLFLUSH). + INST_1x(clflush, kX86InstIdClflush, X86Mem) + + //! Flush cache line (CLFLUSH_OPT). + INST_1x(clflushopt, kX86InstIdClflushopt, X86Mem) + + // -------------------------------------------------------------------------- + // [PREFETCHW / PREFETCHW1] + // -------------------------------------------------------------------------- + + //! Prefetch data into caches in anticipation of a write (3DNOW / PREFETCHW). + INST_1x(prefetchw, kX86InstIdPrefetchw, X86Mem) + + //! Prefetch vector data into caches with intent to write and T1 hint (PREFETCHWT1). + INST_1x(prefetchwt1, kX86InstIdPrefetchwt1, X86Mem) + + // -------------------------------------------------------------------------- + // [RDRAND / RDSEED] + // -------------------------------------------------------------------------- + + //! Store a pseudo-random number in destination register (crypto-unsafe) (RDRAND). + INST_1x(rdrand, kX86InstIdRdrand, X86GpVar) + + //! Store a random seed in destination register (crypto-unsafe) (RDSEED). + INST_1x(rdseed, kX86InstIdRdseed, X86GpVar) + + // -------------------------------------------------------------------------- + // [FSGSBASE] + // -------------------------------------------------------------------------- + + INST_1x(rdfsbase, kX86InstIdRdfsbase, X86GpVar) + INST_1x(rdgsbase, kX86InstIdRdgsbase, X86GpVar) + INST_1x(wrfsbase, kX86InstIdWrfsbase, X86GpVar) + INST_1x(wrgsbase, kX86InstIdWrgsbase, X86GpVar) + + // -------------------------------------------------------------------------- + // [MMX] + // -------------------------------------------------------------------------- + + //! Move DWORD (MMX). + INST_2x(movd, kX86InstIdMovd, X86Mem, X86MmVar) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86GpVar, X86MmVar) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86MmVar, X86Mem) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86MmVar, X86GpVar) + + //! Move QWORD (MMX). + INST_2x(movq, kX86InstIdMovq, X86MmVar, X86MmVar) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86Mem, X86MmVar) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86MmVar, X86Mem) + + //! Move QWORD (X64 Only). + INST_2x(movq, kX86InstIdMovq, X86GpVar, X86MmVar) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86MmVar, X86GpVar) + + //! Pack DWORDs to WORDs with signed saturation (MMX). + INST_2x(packssdw, kX86InstIdPackssdw, X86MmVar, X86MmVar) + //! \overload + INST_2x(packssdw, kX86InstIdPackssdw, X86MmVar, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (MMX). + INST_2x(packsswb, kX86InstIdPacksswb, X86MmVar, X86MmVar) + //! \overload + INST_2x(packsswb, kX86InstIdPacksswb, X86MmVar, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (MMX). + INST_2x(packuswb, kX86InstIdPackuswb, X86MmVar, X86MmVar) + //! \overload + INST_2x(packuswb, kX86InstIdPackuswb, X86MmVar, X86Mem) + + //! Packed BYTE add (MMX). + INST_2x(paddb, kX86InstIdPaddb, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddb, kX86InstIdPaddb, X86MmVar, X86Mem) + + //! Packed DWORD add (MMX). + INST_2x(paddd, kX86InstIdPaddd, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddd, kX86InstIdPaddd, X86MmVar, X86Mem) + + //! Packed BYTE add with saturation (MMX). + INST_2x(paddsb, kX86InstIdPaddsb, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddsb, kX86InstIdPaddsb, X86MmVar, X86Mem) + + //! Packed WORD add with saturation (MMX). + INST_2x(paddsw, kX86InstIdPaddsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddsw, kX86InstIdPaddsw, X86MmVar, X86Mem) + + //! Packed BYTE add with unsigned saturation (MMX). + INST_2x(paddusb, kX86InstIdPaddusb, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddusb, kX86InstIdPaddusb, X86MmVar, X86Mem) + + //! Packed WORD add with unsigned saturation (MMX). + INST_2x(paddusw, kX86InstIdPaddusw, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddusw, kX86InstIdPaddusw, X86MmVar, X86Mem) + + //! Packed WORD add (MMX). + INST_2x(paddw, kX86InstIdPaddw, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddw, kX86InstIdPaddw, X86MmVar, X86Mem) + + //! Packed and (MMX). + INST_2x(pand, kX86InstIdPand, X86MmVar, X86MmVar) + //! \overload + INST_2x(pand, kX86InstIdPand, X86MmVar, X86Mem) + + //! Packed and-not (MMX). + INST_2x(pandn, kX86InstIdPandn, X86MmVar, X86MmVar) + //! \overload + INST_2x(pandn, kX86InstIdPandn, X86MmVar, X86Mem) + + //! Packed BYTEs compare for equality (MMX). + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86MmVar, X86MmVar) + //! \overload + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86MmVar, X86Mem) + + //! Packed DWORDs compare for equality (MMX). + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86MmVar, X86MmVar) + //! \overload + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86MmVar, X86Mem) + + //! Packed WORDs compare for equality (MMX). + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86MmVar, X86Mem) + + //! Packed BYTEs compare if greater than (MMX). + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86MmVar, X86MmVar) + //! \overload + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86MmVar, X86Mem) + + //! Packed DWORDs compare if greater than (MMX). + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86MmVar, X86MmVar) + //! \overload + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86MmVar, X86Mem) + + //! Packed WORDs compare if greater than (MMX). + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86MmVar, X86Mem) + + //! Packed WORD multiply high (MMX). + INST_2x(pmulhw, kX86InstIdPmulhw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmulhw, kX86InstIdPmulhw, X86MmVar, X86Mem) + + //! Packed WORD multiply low (MMX). + INST_2x(pmullw, kX86InstIdPmullw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmullw, kX86InstIdPmullw, X86MmVar, X86Mem) + + //! Packed bitwise or (MMX). + INST_2x(por, kX86InstIdPor, X86MmVar, X86MmVar) + //! \overload + INST_2x(por, kX86InstIdPor, X86MmVar, X86Mem) + + //! Packed WORD multiply and add to packed DWORD (MMX). + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86MmVar, X86Mem) + + //! Packed DWORD shift left logical (MMX). + INST_2x(pslld, kX86InstIdPslld, X86MmVar, X86MmVar) + //! \overload + INST_2x(pslld, kX86InstIdPslld, X86MmVar, X86Mem) + //! \overload + INST_2i(pslld, kX86InstIdPslld, X86MmVar, Imm) + + //! Packed QWORD shift left logical (MMX). + INST_2x(psllq, kX86InstIdPsllq, X86MmVar, X86MmVar) + //! \overload + INST_2x(psllq, kX86InstIdPsllq, X86MmVar, X86Mem) + //! \overload + INST_2i(psllq, kX86InstIdPsllq, X86MmVar, Imm) + + //! Packed WORD shift left logical (MMX). + INST_2x(psllw, kX86InstIdPsllw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psllw, kX86InstIdPsllw, X86MmVar, X86Mem) + //! \overload + INST_2i(psllw, kX86InstIdPsllw, X86MmVar, Imm) + + //! Packed DWORD shift right arithmetic (MMX). + INST_2x(psrad, kX86InstIdPsrad, X86MmVar, X86MmVar) + //! \overload + INST_2x(psrad, kX86InstIdPsrad, X86MmVar, X86Mem) + //! \overload + INST_2i(psrad, kX86InstIdPsrad, X86MmVar, Imm) + + //! Packed WORD shift right arithmetic (MMX). + INST_2x(psraw, kX86InstIdPsraw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psraw, kX86InstIdPsraw, X86MmVar, X86Mem) + //! \overload + INST_2i(psraw, kX86InstIdPsraw, X86MmVar, Imm) + + //! Packed DWORD shift right logical (MMX). + INST_2x(psrld, kX86InstIdPsrld, X86MmVar, X86MmVar) + //! \overload + INST_2x(psrld, kX86InstIdPsrld, X86MmVar, X86Mem) + //! \overload + INST_2i(psrld, kX86InstIdPsrld, X86MmVar, Imm) + + //! Packed QWORD shift right logical (MMX). + INST_2x(psrlq, kX86InstIdPsrlq, X86MmVar, X86MmVar) + //! \overload + INST_2x(psrlq, kX86InstIdPsrlq, X86MmVar, X86Mem) + //! \overload + INST_2i(psrlq, kX86InstIdPsrlq, X86MmVar, Imm) + + //! Packed WORD shift right logical (MMX). + INST_2x(psrlw, kX86InstIdPsrlw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psrlw, kX86InstIdPsrlw, X86MmVar, X86Mem) + //! \overload + INST_2i(psrlw, kX86InstIdPsrlw, X86MmVar, Imm) + + //! Packed BYTE subtract (MMX). + INST_2x(psubb, kX86InstIdPsubb, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubb, kX86InstIdPsubb, X86MmVar, X86Mem) + + //! Packed DWORD subtract (MMX). + INST_2x(psubd, kX86InstIdPsubd, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubd, kX86InstIdPsubd, X86MmVar, X86Mem) + + //! Packed BYTE subtract with saturation (MMX). + INST_2x(psubsb, kX86InstIdPsubsb, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubsb, kX86InstIdPsubsb, X86MmVar, X86Mem) + + //! Packed WORD subtract with saturation (MMX). + INST_2x(psubsw, kX86InstIdPsubsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubsw, kX86InstIdPsubsw, X86MmVar, X86Mem) + + //! Packed BYTE subtract with unsigned saturation (MMX). + INST_2x(psubusb, kX86InstIdPsubusb, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubusb, kX86InstIdPsubusb, X86MmVar, X86Mem) + + //! Packed WORD subtract with unsigned saturation (MMX). + INST_2x(psubusw, kX86InstIdPsubusw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubusw, kX86InstIdPsubusw, X86MmVar, X86Mem) + + //! Packed WORD subtract (MMX). + INST_2x(psubw, kX86InstIdPsubw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubw, kX86InstIdPsubw, X86MmVar, X86Mem) + + //! Unpack high packed BYTEs to WORDs (MMX). + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86MmVar, X86MmVar) + //! \overload + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86MmVar, X86Mem) + + //! Unpack high packed DWORDs to QWORDs (MMX). + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86MmVar, X86MmVar) + //! \overload + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86MmVar, X86Mem) + + //! Unpack high packed WORDs to DWORDs (MMX). + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86MmVar, X86MmVar) + //! \overload + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86MmVar, X86Mem) + + //! Unpack low packed BYTEs to WORDs (MMX). + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86MmVar, X86MmVar) + //! \overload + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86MmVar, X86Mem) + + //! Unpack low packed DWORDs to QWORDs (MMX). + INST_2x(punpckldq, kX86InstIdPunpckldq, X86MmVar, X86MmVar) + //! \overload + INST_2x(punpckldq, kX86InstIdPunpckldq, X86MmVar, X86Mem) + + //! Unpack low packed WORDs to DWORDs (MMX). + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86MmVar, X86MmVar) + //! \overload + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86MmVar, X86Mem) + + //! Packed bitwise xor (MMX). + INST_2x(pxor, kX86InstIdPxor, X86MmVar, X86MmVar) + //! \overload + INST_2x(pxor, kX86InstIdPxor, X86MmVar, X86Mem) + + //! Empty MMX state. + INST_0x(emms, kX86InstIdEmms) + + // -------------------------------------------------------------------------- + // [3DNOW] + // -------------------------------------------------------------------------- + + //! Packed unsigned BYTE average (3DNOW). + INST_2x(pavgusb, kX86InstIdPavgusb, X86MmVar, X86MmVar) + //! \overload + INST_2x(pavgusb, kX86InstIdPavgusb, X86MmVar, X86Mem) + + //! Packed SP-FP to DWORD convert (3DNOW). + INST_2x(pf2id, kX86InstIdPf2id, X86MmVar, X86MmVar) + //! \overload + INST_2x(pf2id, kX86InstIdPf2id, X86MmVar, X86Mem) + + //! Packed SP-FP to WORD convert (3DNOW). + INST_2x(pf2iw, kX86InstIdPf2iw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pf2iw, kX86InstIdPf2iw, X86MmVar, X86Mem) + + //! Packed SP-FP accumulate (3DNOW). + INST_2x(pfacc, kX86InstIdPfacc, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfacc, kX86InstIdPfacc, X86MmVar, X86Mem) + + //! Packed SP-FP addition (3DNOW). + INST_2x(pfadd, kX86InstIdPfadd, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfadd, kX86InstIdPfadd, X86MmVar, X86Mem) + + //! Packed SP-FP compare - dst == src (3DNOW). + INST_2x(pfcmpeq, kX86InstIdPfcmpeq, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfcmpeq, kX86InstIdPfcmpeq, X86MmVar, X86Mem) + + //! Packed SP-FP compare - dst >= src (3DNOW). + INST_2x(pfcmpge, kX86InstIdPfcmpge, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfcmpge, kX86InstIdPfcmpge, X86MmVar, X86Mem) + + //! Packed SP-FP compare - dst > src (3DNOW). + INST_2x(pfcmpgt, kX86InstIdPfcmpgt, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfcmpgt, kX86InstIdPfcmpgt, X86MmVar, X86Mem) + + //! Packed SP-FP maximum (3DNOW). + INST_2x(pfmax, kX86InstIdPfmax, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfmax, kX86InstIdPfmax, X86MmVar, X86Mem) + + //! Packed SP-FP minimum (3DNOW). + INST_2x(pfmin, kX86InstIdPfmin, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfmin, kX86InstIdPfmin, X86MmVar, X86Mem) + + //! Packed SP-FP multiply (3DNOW). + INST_2x(pfmul, kX86InstIdPfmul, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfmul, kX86InstIdPfmul, X86MmVar, X86Mem) + + //! Packed SP-FP negative accumulate (3DNOW). + INST_2x(pfnacc, kX86InstIdPfnacc, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfnacc, kX86InstIdPfnacc, X86MmVar, X86Mem) + + //! Packed SP-FP mixed accumulate (3DNOW). + INST_2x(pfpnacc, kX86InstIdPfpnacc, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfpnacc, kX86InstIdPfpnacc, X86MmVar, X86Mem) + + //! Packed SP-FP reciprocal approximation (3DNOW). + INST_2x(pfrcp, kX86InstIdPfrcp, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfrcp, kX86InstIdPfrcp, X86MmVar, X86Mem) + + //! Packed SP-FP reciprocal, first iteration step (3DNOW). + INST_2x(pfrcpit1, kX86InstIdPfrcpit1, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfrcpit1, kX86InstIdPfrcpit1, X86MmVar, X86Mem) + + //! Packed SP-FP reciprocal, second iteration step (3DNOW). + INST_2x(pfrcpit2, kX86InstIdPfrcpit2, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfrcpit2, kX86InstIdPfrcpit2, X86MmVar, X86Mem) + + //! Packed SP-FP reciprocal square root, first iteration step (3DNOW). + INST_2x(pfrsqit1, kX86InstIdPfrsqit1, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfrsqit1, kX86InstIdPfrsqit1, X86MmVar, X86Mem) + + //! Packed SP-FP reciprocal square root approximation (3DNOW). + INST_2x(pfrsqrt, kX86InstIdPfrsqrt, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfrsqrt, kX86InstIdPfrsqrt, X86MmVar, X86Mem) + + //! Packed SP-FP subtract (3DNOW). + INST_2x(pfsub, kX86InstIdPfsub, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfsub, kX86InstIdPfsub, X86MmVar, X86Mem) + + //! Packed SP-FP reverse subtract (3DNOW). + INST_2x(pfsubr, kX86InstIdPfsubr, X86MmVar, X86MmVar) + //! \overload + INST_2x(pfsubr, kX86InstIdPfsubr, X86MmVar, X86Mem) + + //! Packed DWORDs to SP-FP (3DNOW). + INST_2x(pi2fd, kX86InstIdPi2fd, X86MmVar, X86MmVar) + //! \overload + INST_2x(pi2fd, kX86InstIdPi2fd, X86MmVar, X86Mem) + + //! Packed WORDs to SP-FP (3DNOW). + INST_2x(pi2fw, kX86InstIdPi2fw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pi2fw, kX86InstIdPi2fw, X86MmVar, X86Mem) + + //! Packed multiply WORD with rounding (3DNOW). + INST_2x(pmulhrw, kX86InstIdPmulhrw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmulhrw, kX86InstIdPmulhrw, X86MmVar, X86Mem) + + //! Packed swap DWORDs (3DNOW). + INST_2x(pswapd, kX86InstIdPswapd, X86MmVar, X86MmVar) + //! \overload + INST_2x(pswapd, kX86InstIdPswapd, X86MmVar, X86Mem) + + //! Prefetch (3DNOW). + INST_1x(prefetch3dnow, kX86InstIdPrefetch3dNow, X86Mem) + + //! Faster EMMS (3DNOW). + INST_0x(femms, kX86InstIdFemms) + + // -------------------------------------------------------------------------- + // [SSE] + // -------------------------------------------------------------------------- + + //! Packed SP-FP add (SSE). + INST_2x(addps, kX86InstIdAddps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(addps, kX86InstIdAddps, X86XmmVar, X86Mem) + + //! Scalar SP-FP add (SSE). + INST_2x(addss, kX86InstIdAddss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(addss, kX86InstIdAddss, X86XmmVar, X86Mem) + + //! Packed SP-FP bitwise and-not (SSE). + INST_2x(andnps, kX86InstIdAndnps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(andnps, kX86InstIdAndnps, X86XmmVar, X86Mem) + + //! Packed SP-FP bitwise and (SSE). + INST_2x(andps, kX86InstIdAndps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(andps, kX86InstIdAndps, X86XmmVar, X86Mem) + + //! Packed SP-FP compare (SSE). + INST_3i(cmpps, kX86InstIdCmpps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(cmpps, kX86InstIdCmpps, X86XmmVar, X86Mem, Imm) + + //! Compare scalar SP-FP Values (SSE). + INST_3i(cmpss, kX86InstIdCmpss, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(cmpss, kX86InstIdCmpss, X86XmmVar, X86Mem, Imm) + + //! Scalar ordered SP-FP compare and set EFLAGS (SSE). + INST_2x(comiss, kX86InstIdComiss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(comiss, kX86InstIdComiss, X86XmmVar, X86Mem) + + //! Packed signed INT32 to packed SP-FP conversion (SSE). + INST_2x(cvtpi2ps, kX86InstIdCvtpi2ps, X86XmmVar, X86MmVar) + //! \overload + INST_2x(cvtpi2ps, kX86InstIdCvtpi2ps, X86XmmVar, X86Mem) + + //! Packed SP-FP to packed INT32 conversion (SSE). + INST_2x(cvtps2pi, kX86InstIdCvtps2pi, X86MmVar, X86XmmVar) + //! \overload + INST_2x(cvtps2pi, kX86InstIdCvtps2pi, X86MmVar, X86Mem) + + //! Convert scalar INT32 to SP-FP (SSE). + INST_2x(cvtsi2ss, kX86InstIdCvtsi2ss, X86XmmVar, X86GpVar) + //! \overload + INST_2x(cvtsi2ss, kX86InstIdCvtsi2ss, X86XmmVar, X86Mem) + + //! Convert scalar SP-FP to INT32 (SSE). + INST_2x(cvtss2si, kX86InstIdCvtss2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(cvtss2si, kX86InstIdCvtss2si, X86GpVar, X86Mem) + + //! Convert with truncation packed SP-FP to packed INT32 (SSE). + INST_2x(cvttps2pi, kX86InstIdCvttps2pi, X86MmVar, X86XmmVar) + //! \overload + INST_2x(cvttps2pi, kX86InstIdCvttps2pi, X86MmVar, X86Mem) + + //! Convert with truncation scalar SP-FP to INT32 (SSE). + INST_2x(cvttss2si, kX86InstIdCvttss2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(cvttss2si, kX86InstIdCvttss2si, X86GpVar, X86Mem) + + //! Packed SP-FP divide (SSE). + INST_2x(divps, kX86InstIdDivps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(divps, kX86InstIdDivps, X86XmmVar, X86Mem) + + //! Scalar SP-FP divide (SSE). + INST_2x(divss, kX86InstIdDivss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(divss, kX86InstIdDivss, X86XmmVar, X86Mem) + + //! Load streaming SIMD extension control/status (SSE). + INST_1x(ldmxcsr, kX86InstIdLdmxcsr, X86Mem) + + //! Byte mask write (SSE). + INST_3x(maskmovq, kX86InstIdMaskmovq, X86GpVar /* ZDI */, X86MmVar, X86MmVar) + + //! Packed SP-FP maximum (SSE). + INST_2x(maxps, kX86InstIdMaxps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(maxps, kX86InstIdMaxps, X86XmmVar, X86Mem) + + //! Scalar SP-FP maximum (SSE). + INST_2x(maxss, kX86InstIdMaxss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(maxss, kX86InstIdMaxss, X86XmmVar, X86Mem) + + //! Packed SP-FP minimum (SSE). + INST_2x(minps, kX86InstIdMinps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(minps, kX86InstIdMinps, X86XmmVar, X86Mem) + + //! Scalar SP-FP minimum (SSE). + INST_2x(minss, kX86InstIdMinss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(minss, kX86InstIdMinss, X86XmmVar, X86Mem) + + //! Move aligned packed SP-FP (SSE). + INST_2x(movaps, kX86InstIdMovaps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movaps, kX86InstIdMovaps, X86XmmVar, X86Mem) + //! Move aligned packed SP-FP (SSE). + INST_2x(movaps, kX86InstIdMovaps, X86Mem, X86XmmVar) + + //! Move DWORD. + INST_2x(movd, kX86InstIdMovd, X86Mem, X86XmmVar) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86GpVar, X86XmmVar) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86XmmVar, X86Mem) + //! \overload + INST_2x(movd, kX86InstIdMovd, X86XmmVar, X86GpVar) + + //! Move QWORD (SSE). + INST_2x(movq, kX86InstIdMovq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86Mem, X86XmmVar) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86XmmVar, X86Mem) + + //! Move QWORD (X64 Only). + INST_2x(movq, kX86InstIdMovq, X86GpVar, X86XmmVar) + //! \overload + INST_2x(movq, kX86InstIdMovq, X86XmmVar, X86GpVar) + + //! Move QWORD using NT hint (SSE). + INST_2x(movntq, kX86InstIdMovntq, X86Mem, X86MmVar) + + //! Move high to low packed SP-FP (SSE). + INST_2x(movhlps, kX86InstIdMovhlps, X86XmmVar, X86XmmVar) + + //! Move high packed SP-FP (SSE). + INST_2x(movhps, kX86InstIdMovhps, X86XmmVar, X86Mem) + //! Move high packed SP-FP (SSE). + INST_2x(movhps, kX86InstIdMovhps, X86Mem, X86XmmVar) + + //! Move low to high packed SP-FP (SSE). + INST_2x(movlhps, kX86InstIdMovlhps, X86XmmVar, X86XmmVar) + + //! Move low packed SP-FP (SSE). + INST_2x(movlps, kX86InstIdMovlps, X86XmmVar, X86Mem) + //! Move low packed SP-FP (SSE). + INST_2x(movlps, kX86InstIdMovlps, X86Mem, X86XmmVar) + + //! Move aligned packed SP-FP using NT hint (SSE). + INST_2x(movntps, kX86InstIdMovntps, X86Mem, X86XmmVar) + + //! Move scalar SP-FP (SSE). + INST_2x(movss, kX86InstIdMovss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movss, kX86InstIdMovss, X86XmmVar, X86Mem) + //! \overload + INST_2x(movss, kX86InstIdMovss, X86Mem, X86XmmVar) + + //! Move unaligned packed SP-FP (SSE). + INST_2x(movups, kX86InstIdMovups, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movups, kX86InstIdMovups, X86XmmVar, X86Mem) + //! \overload + INST_2x(movups, kX86InstIdMovups, X86Mem, X86XmmVar) + + //! Packed SP-FP multiply (SSE). + INST_2x(mulps, kX86InstIdMulps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(mulps, kX86InstIdMulps, X86XmmVar, X86Mem) + + //! Scalar SP-FP multiply (SSE). + INST_2x(mulss, kX86InstIdMulss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(mulss, kX86InstIdMulss, X86XmmVar, X86Mem) + + //! Packed SP-FP bitwise or (SSE). + INST_2x(orps, kX86InstIdOrps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(orps, kX86InstIdOrps, X86XmmVar, X86Mem) + + //! Packed BYTE average (SSE). + INST_2x(pavgb, kX86InstIdPavgb, X86MmVar, X86MmVar) + //! \overload + INST_2x(pavgb, kX86InstIdPavgb, X86MmVar, X86Mem) + + //! Packed WORD average (SSE). + INST_2x(pavgw, kX86InstIdPavgw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pavgw, kX86InstIdPavgw, X86MmVar, X86Mem) + + //! Extract WORD based on selector (SSE). + INST_3i(pextrw, kX86InstIdPextrw, X86GpVar, X86MmVar, Imm) + + //! Insert WORD based on selector (SSE). + INST_3i(pinsrw, kX86InstIdPinsrw, X86MmVar, X86GpVar, Imm) + //! \overload + INST_3i(pinsrw, kX86InstIdPinsrw, X86MmVar, X86Mem, Imm) + + //! Packed WORD maximum (SSE). + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86MmVar, X86Mem) + + //! Packed BYTE unsigned maximum (SSE). + INST_2x(pmaxub, kX86InstIdPmaxub, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmaxub, kX86InstIdPmaxub, X86MmVar, X86Mem) + + //! Packed WORD minimum (SSE). + INST_2x(pminsw, kX86InstIdPminsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pminsw, kX86InstIdPminsw, X86MmVar, X86Mem) + + //! Packed BYTE unsigned minimum (SSE). + INST_2x(pminub, kX86InstIdPminub, X86MmVar, X86MmVar) + //! \overload + INST_2x(pminub, kX86InstIdPminub, X86MmVar, X86Mem) + + //! Move byte mask to integer (SSE). + INST_2x(pmovmskb, kX86InstIdPmovmskb, X86GpVar, X86MmVar) + + //! Packed WORD unsigned multiply high (SSE). + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86MmVar, X86Mem) + + //! Packed WORD sum of absolute differences (SSE). + INST_2x(psadbw, kX86InstIdPsadbw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psadbw, kX86InstIdPsadbw, X86MmVar, X86Mem) + + //! Packed WORD shuffle (SSE). + INST_3i(pshufw, kX86InstIdPshufw, X86MmVar, X86MmVar, Imm) + //! \overload + INST_3i(pshufw, kX86InstIdPshufw, X86MmVar, X86Mem, Imm) + + //! Packed SP-FP reciprocal (SSE). + INST_2x(rcpps, kX86InstIdRcpps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(rcpps, kX86InstIdRcpps, X86XmmVar, X86Mem) + + //! Scalar SP-FP reciprocal (SSE). + INST_2x(rcpss, kX86InstIdRcpss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(rcpss, kX86InstIdRcpss, X86XmmVar, X86Mem) + + //! Prefetch (SSE). + INST_2i(prefetch, kX86InstIdPrefetch, X86Mem, Imm) + + //! Packed WORD sum of absolute differences (SSE). + INST_2x(psadbw, kX86InstIdPsadbw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psadbw, kX86InstIdPsadbw, X86XmmVar, X86Mem) + + //! Packed SP-FP Square root reciprocal (SSE). + INST_2x(rsqrtps, kX86InstIdRsqrtps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(rsqrtps, kX86InstIdRsqrtps, X86XmmVar, X86Mem) + + //! Scalar SP-FP Square root reciprocal (SSE). + INST_2x(rsqrtss, kX86InstIdRsqrtss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(rsqrtss, kX86InstIdRsqrtss, X86XmmVar, X86Mem) + + //! Store fence (SSE). + INST_0x(sfence, kX86InstIdSfence) + + //! Shuffle SP-FP (SSE). + INST_3i(shufps, kX86InstIdShufps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(shufps, kX86InstIdShufps, X86XmmVar, X86Mem, Imm) + + //! Packed SP-FP square root (SSE). + INST_2x(sqrtps, kX86InstIdSqrtps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sqrtps, kX86InstIdSqrtps, X86XmmVar, X86Mem) + + //! Scalar SP-FP square root (SSE). + INST_2x(sqrtss, kX86InstIdSqrtss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sqrtss, kX86InstIdSqrtss, X86XmmVar, X86Mem) + + //! Store streaming SIMD extension control/status (SSE). + INST_1x(stmxcsr, kX86InstIdStmxcsr, X86Mem) + + //! Packed SP-FP subtract (SSE). + INST_2x(subps, kX86InstIdSubps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(subps, kX86InstIdSubps, X86XmmVar, X86Mem) + + //! Scalar SP-FP subtract (SSE). + INST_2x(subss, kX86InstIdSubss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(subss, kX86InstIdSubss, X86XmmVar, X86Mem) + + //! Unordered scalar SP-FP compare and set EFLAGS (SSE). + INST_2x(ucomiss, kX86InstIdUcomiss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(ucomiss, kX86InstIdUcomiss, X86XmmVar, X86Mem) + + //! Unpack high packed SP-FP data (SSE). + INST_2x(unpckhps, kX86InstIdUnpckhps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(unpckhps, kX86InstIdUnpckhps, X86XmmVar, X86Mem) + + //! Unpack low packed SP-FP data (SSE). + INST_2x(unpcklps, kX86InstIdUnpcklps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(unpcklps, kX86InstIdUnpcklps, X86XmmVar, X86Mem) + + //! Packed SP-FP bitwise xor (SSE). + INST_2x(xorps, kX86InstIdXorps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(xorps, kX86InstIdXorps, X86XmmVar, X86Mem) + + // -------------------------------------------------------------------------- + // [SSE2] + // -------------------------------------------------------------------------- + + //! Packed DP-FP add (SSE2). + INST_2x(addpd, kX86InstIdAddpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(addpd, kX86InstIdAddpd, X86XmmVar, X86Mem) + + //! Scalar DP-FP add (SSE2). + INST_2x(addsd, kX86InstIdAddsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(addsd, kX86InstIdAddsd, X86XmmVar, X86Mem) + + //! Packed DP-FP bitwise and-not (SSE2). + INST_2x(andnpd, kX86InstIdAndnpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(andnpd, kX86InstIdAndnpd, X86XmmVar, X86Mem) + + //! Packed DP-FP bitwise and (SSE2). + INST_2x(andpd, kX86InstIdAndpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(andpd, kX86InstIdAndpd, X86XmmVar, X86Mem) + + //! Packed DP-FP compare (SSE2). + INST_3i(cmppd, kX86InstIdCmppd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(cmppd, kX86InstIdCmppd, X86XmmVar, X86Mem, Imm) + + //! Scalar SP-FP compare (SSE2). + INST_3i(cmpsd, kX86InstIdCmpsd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(cmpsd, kX86InstIdCmpsd, X86XmmVar, X86Mem, Imm) + + //! Scalar ordered DP-FP compare and set EFLAGS (SSE2). + INST_2x(comisd, kX86InstIdComisd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(comisd, kX86InstIdComisd, X86XmmVar, X86Mem) + + //! Convert packed DWORD integers to packed DP-FP (SSE2). + INST_2x(cvtdq2pd, kX86InstIdCvtdq2pd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtdq2pd, kX86InstIdCvtdq2pd, X86XmmVar, X86Mem) + + //! Convert packed DWORD integers to packed SP-FP (SSE2). + INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmVar, X86Mem) + + //! Convert packed DP-FP to packed DWORDs (SSE2). + INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmVar, X86Mem) + + //! Convert packed DP-FP to packed DWORDs (SSE2). + INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmVar, X86XmmVar) + //! \overload + INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmVar, X86Mem) + + //! Convert packed DP-FP to packed SP-FP (SSE2). + INST_2x(cvtpd2ps, kX86InstIdCvtpd2ps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtpd2ps, kX86InstIdCvtpd2ps, X86XmmVar, X86Mem) + + //! Convert packed DWORDs to packed DP-FP (SSE2). + INST_2x(cvtpi2pd, kX86InstIdCvtpi2pd, X86XmmVar, X86MmVar) + //! \overload + INST_2x(cvtpi2pd, kX86InstIdCvtpi2pd, X86XmmVar, X86Mem) + + //! Convert packed SP-FP to packed DWORDs (SSE2). + INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmVar, X86Mem) + + //! Convert packed SP-FP to packed DP-FP (SSE2). + INST_2x(cvtps2pd, kX86InstIdCvtps2pd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtps2pd, kX86InstIdCvtps2pd, X86XmmVar, X86Mem) + + //! Convert scalar DP-FP to DWORD (SSE2). + INST_2x(cvtsd2si, kX86InstIdCvtsd2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(cvtsd2si, kX86InstIdCvtsd2si, X86GpVar, X86Mem) + + //! Convert scalar DP-FP to scalar SP-FP (SSE2). + INST_2x(cvtsd2ss, kX86InstIdCvtsd2ss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtsd2ss, kX86InstIdCvtsd2ss, X86XmmVar, X86Mem) + + //! Convert DWORD to scalar DP-FP (SSE2). + INST_2x(cvtsi2sd, kX86InstIdCvtsi2sd, X86XmmVar, X86GpVar) + //! \overload + INST_2x(cvtsi2sd, kX86InstIdCvtsi2sd, X86XmmVar, X86Mem) + + //! Convert scalar SP-FP to DP-FP (SSE2). + INST_2x(cvtss2sd, kX86InstIdCvtss2sd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvtss2sd, kX86InstIdCvtss2sd, X86XmmVar, X86Mem) + + //! Convert with truncation packed DP-FP to packed DWORDs (SSE2). + INST_2x(cvttpd2pi, kX86InstIdCvttpd2pi, X86MmVar, X86XmmVar) + //! \overload + INST_2x(cvttpd2pi, kX86InstIdCvttpd2pi, X86MmVar, X86Mem) + + //! Convert with truncation packed DP-FP to packed QWORDs (SSE2). + INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmVar, X86Mem) + + //! Convert with truncation packed SP-FP to packed QWORDs (SSE2). + INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmVar, X86Mem) + + //! Convert with truncation scalar DP-FP to DWORD (SSE2). + INST_2x(cvttsd2si, kX86InstIdCvttsd2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(cvttsd2si, kX86InstIdCvttsd2si, X86GpVar, X86Mem) + + //! Packed DP-FP divide (SSE2). + INST_2x(divpd, kX86InstIdDivpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(divpd, kX86InstIdDivpd, X86XmmVar, X86Mem) + + //! Scalar DP-FP divide (SSE2). + INST_2x(divsd, kX86InstIdDivsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(divsd, kX86InstIdDivsd, X86XmmVar, X86Mem) + + //! Load fence (SSE2). + INST_0x(lfence, kX86InstIdLfence) + + //! Store selected bytes of DQWORD (SSE2). + INST_3x(maskmovdqu, kX86InstIdMaskmovdqu, X86GpVar /* ZDI */, X86XmmVar, X86XmmVar) + + //! Packed DP-FP maximum (SSE2). + INST_2x(maxpd, kX86InstIdMaxpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(maxpd, kX86InstIdMaxpd, X86XmmVar, X86Mem) + + //! Scalar DP-FP maximum (SSE2). + INST_2x(maxsd, kX86InstIdMaxsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(maxsd, kX86InstIdMaxsd, X86XmmVar, X86Mem) + + //! Memory fence (SSE2). + INST_0x(mfence, kX86InstIdMfence) + + //! Packed DP-FP minimum (SSE2). + INST_2x(minpd, kX86InstIdMinpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(minpd, kX86InstIdMinpd, X86XmmVar, X86Mem) + + //! Scalar DP-FP minimum (SSE2). + INST_2x(minsd, kX86InstIdMinsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(minsd, kX86InstIdMinsd, X86XmmVar, X86Mem) + + //! Move aligned DQWORD (SSE2). + INST_2x(movdqa, kX86InstIdMovdqa, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movdqa, kX86InstIdMovdqa, X86XmmVar, X86Mem) + //! \overload + INST_2x(movdqa, kX86InstIdMovdqa, X86Mem, X86XmmVar) + + //! Move unaligned DQWORD (SSE2). + INST_2x(movdqu, kX86InstIdMovdqu, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movdqu, kX86InstIdMovdqu, X86XmmVar, X86Mem) + //! \overload + INST_2x(movdqu, kX86InstIdMovdqu, X86Mem, X86XmmVar) + + //! Extract packed SP-FP sign mask (SSE2). + INST_2x(movmskps, kX86InstIdMovmskps, X86GpVar, X86XmmVar) + + //! Extract packed DP-FP sign mask (SSE2). + INST_2x(movmskpd, kX86InstIdMovmskpd, X86GpVar, X86XmmVar) + + //! Move scalar DP-FP (SSE2). + INST_2x(movsd, kX86InstIdMovsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movsd, kX86InstIdMovsd, X86XmmVar, X86Mem) + //! \overload + INST_2x(movsd, kX86InstIdMovsd, X86Mem, X86XmmVar) + + //! Move aligned packed DP-FP (SSE2). + INST_2x(movapd, kX86InstIdMovapd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movapd, kX86InstIdMovapd, X86XmmVar, X86Mem) + //! \overload + INST_2x(movapd, kX86InstIdMovapd, X86Mem, X86XmmVar) + + //! Move QWORD from XMM to MMX register (SSE2). + INST_2x(movdq2q, kX86InstIdMovdq2q, X86MmVar, X86XmmVar) + + //! Move QWORD from MMX to XMM register (SSE2). + INST_2x(movq2dq, kX86InstIdMovq2dq, X86XmmVar, X86MmVar) + + //! Move high packed DP-FP (SSE2). + INST_2x(movhpd, kX86InstIdMovhpd, X86XmmVar, X86Mem) + //! \overload + INST_2x(movhpd, kX86InstIdMovhpd, X86Mem, X86XmmVar) + + //! Move low packed DP-FP (SSE2). + INST_2x(movlpd, kX86InstIdMovlpd, X86XmmVar, X86Mem) + //! \overload + INST_2x(movlpd, kX86InstIdMovlpd, X86Mem, X86XmmVar) + + //! Store OWORD using NT hint (SSE2). + INST_2x(movntdq, kX86InstIdMovntdq, X86Mem, X86XmmVar) + + //! Store DWORD using NT hint (SSE2). + INST_2x(movnti, kX86InstIdMovnti, X86Mem, X86GpVar) + + //! Store packed DP-FP using NT hint (SSE2). + INST_2x(movntpd, kX86InstIdMovntpd, X86Mem, X86XmmVar) + + //! Move unaligned packed DP-FP (SSE2). + INST_2x(movupd, kX86InstIdMovupd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movupd, kX86InstIdMovupd, X86XmmVar, X86Mem) + //! \overload + INST_2x(movupd, kX86InstIdMovupd, X86Mem, X86XmmVar) + + //! Packed DP-FP multiply (SSE2). + INST_2x(mulpd, kX86InstIdMulpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(mulpd, kX86InstIdMulpd, X86XmmVar, X86Mem) + + //! Scalar DP-FP multiply (SSE2). + INST_2x(mulsd, kX86InstIdMulsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(mulsd, kX86InstIdMulsd, X86XmmVar, X86Mem) + + //! Packed DP-FP bitwise or (SSE2). + INST_2x(orpd, kX86InstIdOrpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(orpd, kX86InstIdOrpd, X86XmmVar, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (SSE2). + INST_2x(packsswb, kX86InstIdPacksswb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(packsswb, kX86InstIdPacksswb, X86XmmVar, X86Mem) + + //! Pack DWORDs to WORDs with signed saturation (SSE2). + INST_2x(packssdw, kX86InstIdPackssdw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(packssdw, kX86InstIdPackssdw, X86XmmVar, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (SSE2). + INST_2x(packuswb, kX86InstIdPackuswb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(packuswb, kX86InstIdPackuswb, X86XmmVar, X86Mem) + + //! Packed BYTE add (SSE2). + INST_2x(paddb, kX86InstIdPaddb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddb, kX86InstIdPaddb, X86XmmVar, X86Mem) + + //! Packed WORD add (SSE2). + INST_2x(paddw, kX86InstIdPaddw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddw, kX86InstIdPaddw, X86XmmVar, X86Mem) + + //! Packed DWORD add (SSE2). + INST_2x(paddd, kX86InstIdPaddd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddd, kX86InstIdPaddd, X86XmmVar, X86Mem) + + //! Packed QWORD add (SSE2). + INST_2x(paddq, kX86InstIdPaddq, X86MmVar, X86MmVar) + //! \overload + INST_2x(paddq, kX86InstIdPaddq, X86MmVar, X86Mem) + + //! Packed QWORD add (SSE2). + INST_2x(paddq, kX86InstIdPaddq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddq, kX86InstIdPaddq, X86XmmVar, X86Mem) + + //! Packed BYTE add with saturation (SSE2). + INST_2x(paddsb, kX86InstIdPaddsb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddsb, kX86InstIdPaddsb, X86XmmVar, X86Mem) + + //! Packed WORD add with saturation (SSE2). + INST_2x(paddsw, kX86InstIdPaddsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddsw, kX86InstIdPaddsw, X86XmmVar, X86Mem) + + //! Packed BYTE add with unsigned saturation (SSE2). + INST_2x(paddusb, kX86InstIdPaddusb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddusb, kX86InstIdPaddusb, X86XmmVar, X86Mem) + + //! Packed WORD add with unsigned saturation (SSE2). + INST_2x(paddusw, kX86InstIdPaddusw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(paddusw, kX86InstIdPaddusw, X86XmmVar, X86Mem) + + //! Packed bitwise and (SSE2). + INST_2x(pand, kX86InstIdPand, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pand, kX86InstIdPand, X86XmmVar, X86Mem) + + //! Packed bitwise and-not (SSE2). + INST_2x(pandn, kX86InstIdPandn, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pandn, kX86InstIdPandn, X86XmmVar, X86Mem) + + //! Spin loop hint (SSE2). + INST_0x(pause, kX86InstIdPause) + + //! Packed BYTE average (SSE2). + INST_2x(pavgb, kX86InstIdPavgb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pavgb, kX86InstIdPavgb, X86XmmVar, X86Mem) + + //! Packed WORD average (SSE2). + INST_2x(pavgw, kX86InstIdPavgw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pavgw, kX86InstIdPavgw, X86XmmVar, X86Mem) + + //! Packed BYTE compare for equality (SSE2). + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpeqb, kX86InstIdPcmpeqb, X86XmmVar, X86Mem) + + //! Packed WROD compare for equality (SSE2). + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpeqw, kX86InstIdPcmpeqw, X86XmmVar, X86Mem) + + //! Packed DWORD compare for equality (SSE2). + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpeqd, kX86InstIdPcmpeqd, X86XmmVar, X86Mem) + + //! Packed BYTE compare if greater than (SSE2). + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpgtb, kX86InstIdPcmpgtb, X86XmmVar, X86Mem) + + //! Packed WORD compare if greater than (SSE2). + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpgtw, kX86InstIdPcmpgtw, X86XmmVar, X86Mem) + + //! Packed DWORD compare if greater than (SSE2). + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpgtd, kX86InstIdPcmpgtd, X86XmmVar, X86Mem) + + //! Extract WORD based on selector (SSE2). + INST_3i(pextrw, kX86InstIdPextrw, X86GpVar, X86XmmVar, Imm) + + //! Insert WORD based on selector (SSE2). + INST_3i(pinsrw, kX86InstIdPinsrw, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_3i(pinsrw, kX86InstIdPinsrw, X86XmmVar, X86Mem, Imm) + + //! Packed WORD maximum (SSE2). + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaxsw, kX86InstIdPmaxsw, X86XmmVar, X86Mem) + + //! Packed BYTE unsigned maximum (SSE2). + INST_2x(pmaxub, kX86InstIdPmaxub, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaxub, kX86InstIdPmaxub, X86XmmVar, X86Mem) + + //! Packed WORD minimum (SSE2). + INST_2x(pminsw, kX86InstIdPminsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pminsw, kX86InstIdPminsw, X86XmmVar, X86Mem) + + //! Packed BYTE unsigned minimum (SSE2). + INST_2x(pminub, kX86InstIdPminub, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pminub, kX86InstIdPminub, X86XmmVar, X86Mem) + + //! Move BYTE mask (SSE2). + INST_2x(pmovmskb, kX86InstIdPmovmskb, X86GpVar, X86XmmVar) + + //! Packed WORD multiply high (SSE2). + INST_2x(pmulhw, kX86InstIdPmulhw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmulhw, kX86InstIdPmulhw, X86XmmVar, X86Mem) + + //! Packed WORD unsigned multiply high (SSE2). + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmulhuw, kX86InstIdPmulhuw, X86XmmVar, X86Mem) + + //! Packed WORD multiply low (SSE2). + INST_2x(pmullw, kX86InstIdPmullw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmullw, kX86InstIdPmullw, X86XmmVar, X86Mem) + + //! Packed DWORD multiply to QWORD (SSE2). + INST_2x(pmuludq, kX86InstIdPmuludq, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmuludq, kX86InstIdPmuludq, X86MmVar, X86Mem) + + //! Packed DWORD multiply to QWORD (SSE2). + INST_2x(pmuludq, kX86InstIdPmuludq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmuludq, kX86InstIdPmuludq, X86XmmVar, X86Mem) + + //! Packed bitwise or (SSE2). + INST_2x(por, kX86InstIdPor, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(por, kX86InstIdPor, X86XmmVar, X86Mem) + + //! Packed DWORD shift left logical (SSE2). + INST_2x(pslld, kX86InstIdPslld, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pslld, kX86InstIdPslld, X86XmmVar, X86Mem) + //! \overload + INST_2i(pslld, kX86InstIdPslld, X86XmmVar, Imm) + + //! Packed QWORD shift left logical (SSE2). + INST_2x(psllq, kX86InstIdPsllq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psllq, kX86InstIdPsllq, X86XmmVar, X86Mem) + //! \overload + INST_2i(psllq, kX86InstIdPsllq, X86XmmVar, Imm) + + //! Packed WORD shift left logical (SSE2). + INST_2x(psllw, kX86InstIdPsllw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psllw, kX86InstIdPsllw, X86XmmVar, X86Mem) + //! \overload + INST_2i(psllw, kX86InstIdPsllw, X86XmmVar, Imm) + + //! Packed DQWORD shift left logical (SSE2). + INST_2i(pslldq, kX86InstIdPslldq, X86XmmVar, Imm) + + //! Packed DWORD shift right arithmetic (SSE2). + INST_2x(psrad, kX86InstIdPsrad, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psrad, kX86InstIdPsrad, X86XmmVar, X86Mem) + //! \overload + INST_2i(psrad, kX86InstIdPsrad, X86XmmVar, Imm) + + //! Packed WORD shift right arithmetic (SSE2). + INST_2x(psraw, kX86InstIdPsraw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psraw, kX86InstIdPsraw, X86XmmVar, X86Mem) + //! \overload + INST_2i(psraw, kX86InstIdPsraw, X86XmmVar, Imm) + + //! Packed BYTE subtract (SSE2). + INST_2x(psubb, kX86InstIdPsubb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubb, kX86InstIdPsubb, X86XmmVar, X86Mem) + + //! Packed DWORD subtract (SSE2). + INST_2x(psubd, kX86InstIdPsubd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubd, kX86InstIdPsubd, X86XmmVar, X86Mem) + + //! Packed QWORD subtract (SSE2). + INST_2x(psubq, kX86InstIdPsubq, X86MmVar, X86MmVar) + //! \overload + INST_2x(psubq, kX86InstIdPsubq, X86MmVar, X86Mem) + + //! Packed QWORD subtract (SSE2). + INST_2x(psubq, kX86InstIdPsubq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubq, kX86InstIdPsubq, X86XmmVar, X86Mem) + + //! Packed WORD subtract (SSE2). + INST_2x(psubw, kX86InstIdPsubw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubw, kX86InstIdPsubw, X86XmmVar, X86Mem) + + //! Packed WORD to DWORD multiply and add (SSE2). + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaddwd, kX86InstIdPmaddwd, X86XmmVar, X86Mem) + + //! Packed DWORD shuffle (SSE2). + INST_3i(pshufd, kX86InstIdPshufd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(pshufd, kX86InstIdPshufd, X86XmmVar, X86Mem, Imm) + + //! Packed WORD shuffle high (SSE2). + INST_3i(pshufhw, kX86InstIdPshufhw, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(pshufhw, kX86InstIdPshufhw, X86XmmVar, X86Mem, Imm) + + //! Packed WORD shuffle low (SSE2). + INST_3i(pshuflw, kX86InstIdPshuflw, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(pshuflw, kX86InstIdPshuflw, X86XmmVar, X86Mem, Imm) + + //! Packed DWORD shift right logical (SSE2). + INST_2x(psrld, kX86InstIdPsrld, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psrld, kX86InstIdPsrld, X86XmmVar, X86Mem) + //! \overload + INST_2i(psrld, kX86InstIdPsrld, X86XmmVar, Imm) + + //! Packed QWORD shift right logical (SSE2). + INST_2x(psrlq, kX86InstIdPsrlq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psrlq, kX86InstIdPsrlq, X86XmmVar, X86Mem) + //! \overload + INST_2i(psrlq, kX86InstIdPsrlq, X86XmmVar, Imm) + + //! Scalar DQWORD shift right logical (SSE2). + INST_2i(psrldq, kX86InstIdPsrldq, X86XmmVar, Imm) + + //! Packed WORD shift right logical (SSE2). + INST_2x(psrlw, kX86InstIdPsrlw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psrlw, kX86InstIdPsrlw, X86XmmVar, X86Mem) + //! \overload + INST_2i(psrlw, kX86InstIdPsrlw, X86XmmVar, Imm) + + //! Packed BYTE subtract with saturation (SSE2). + INST_2x(psubsb, kX86InstIdPsubsb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubsb, kX86InstIdPsubsb, X86XmmVar, X86Mem) + + //! Packed WORD subtract with saturation (SSE2). + INST_2x(psubsw, kX86InstIdPsubsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubsw, kX86InstIdPsubsw, X86XmmVar, X86Mem) + + //! Packed BYTE subtract with unsigned saturation (SSE2). + INST_2x(psubusb, kX86InstIdPsubusb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubusb, kX86InstIdPsubusb, X86XmmVar, X86Mem) + + //! Packed WORD subtract with unsigned saturation (SSE2). + INST_2x(psubusw, kX86InstIdPsubusw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psubusw, kX86InstIdPsubusw, X86XmmVar, X86Mem) + + //! Unpack high packed BYTEs to WORDs (SSE2). + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpckhbw, kX86InstIdPunpckhbw, X86XmmVar, X86Mem) + + //! Unpack high packed DWORDs to QWORDs (SSE2). + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpckhdq, kX86InstIdPunpckhdq, X86XmmVar, X86Mem) + + //! Unpack high packed QWORDs to DQWORD (SSE2). + INST_2x(punpckhqdq, kX86InstIdPunpckhqdq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpckhqdq, kX86InstIdPunpckhqdq, X86XmmVar, X86Mem) + + //! Unpack high packed WORDs to DWORDs (SSE2). + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpckhwd, kX86InstIdPunpckhwd, X86XmmVar, X86Mem) + + //! Unpack low packed BYTEs to WORDs (SSE2). + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpcklbw, kX86InstIdPunpcklbw, X86XmmVar, X86Mem) + + //! Unpack low packed DWORDs to QWORDs (SSE2). + INST_2x(punpckldq, kX86InstIdPunpckldq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpckldq, kX86InstIdPunpckldq, X86XmmVar, X86Mem) + + //! Unpack low packed QWORDs to DQWORD (SSE2). + INST_2x(punpcklqdq, kX86InstIdPunpcklqdq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpcklqdq, kX86InstIdPunpcklqdq, X86XmmVar, X86Mem) + + //! Unpack low packed WORDs to DWORDs (SSE2). + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(punpcklwd, kX86InstIdPunpcklwd, X86XmmVar, X86Mem) + + //! Packed bitwise xor (SSE2). + INST_2x(pxor, kX86InstIdPxor, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pxor, kX86InstIdPxor, X86XmmVar, X86Mem) + + //! Shuffle DP-FP (SSE2). + INST_3i(shufpd, kX86InstIdShufpd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(shufpd, kX86InstIdShufpd, X86XmmVar, X86Mem, Imm) + + //! Packed DP-FP square root (SSE2). + INST_2x(sqrtpd, kX86InstIdSqrtpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sqrtpd, kX86InstIdSqrtpd, X86XmmVar, X86Mem) + + //! Scalar DP-FP square root (SSE2). + INST_2x(sqrtsd, kX86InstIdSqrtsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sqrtsd, kX86InstIdSqrtsd, X86XmmVar, X86Mem) + + //! Packed DP-FP subtract (SSE2). + INST_2x(subpd, kX86InstIdSubpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(subpd, kX86InstIdSubpd, X86XmmVar, X86Mem) + + //! Scalar DP-FP subtract (SSE2). + INST_2x(subsd, kX86InstIdSubsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(subsd, kX86InstIdSubsd, X86XmmVar, X86Mem) + + //! Scalar DP-FP unordered compare and set EFLAGS (SSE2). + INST_2x(ucomisd, kX86InstIdUcomisd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(ucomisd, kX86InstIdUcomisd, X86XmmVar, X86Mem) + + //! Unpack and interleave high packed DP-FP (SSE2). + INST_2x(unpckhpd, kX86InstIdUnpckhpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(unpckhpd, kX86InstIdUnpckhpd, X86XmmVar, X86Mem) + + //! Unpack and interleave low packed DP-FP (SSE2). + INST_2x(unpcklpd, kX86InstIdUnpcklpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(unpcklpd, kX86InstIdUnpcklpd, X86XmmVar, X86Mem) + + //! Packed DP-FP bitwise xor (SSE2). + INST_2x(xorpd, kX86InstIdXorpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(xorpd, kX86InstIdXorpd, X86XmmVar, X86Mem) + + // -------------------------------------------------------------------------- + // [SSE3] + // -------------------------------------------------------------------------- + + //! Packed DP-FP add/subtract (SSE3). + INST_2x(addsubpd, kX86InstIdAddsubpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(addsubpd, kX86InstIdAddsubpd, X86XmmVar, X86Mem) + + //! Packed SP-FP add/subtract (SSE3). + INST_2x(addsubps, kX86InstIdAddsubps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(addsubps, kX86InstIdAddsubps, X86XmmVar, X86Mem) + + //! Store truncated `fp0` to `short_or_int_or_long[o0]` and POP (FPU & SSE3). + INST_1x(fisttp, kX86InstIdFisttp, X86Mem) + + //! Packed DP-FP horizontal add (SSE3). + INST_2x(haddpd, kX86InstIdHaddpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(haddpd, kX86InstIdHaddpd, X86XmmVar, X86Mem) + + //! Packed SP-FP horizontal add (SSE3). + INST_2x(haddps, kX86InstIdHaddps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(haddps, kX86InstIdHaddps, X86XmmVar, X86Mem) + + //! Packed DP-FP horizontal subtract (SSE3). + INST_2x(hsubpd, kX86InstIdHsubpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(hsubpd, kX86InstIdHsubpd, X86XmmVar, X86Mem) + + //! Packed SP-FP horizontal subtract (SSE3). + INST_2x(hsubps, kX86InstIdHsubps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(hsubps, kX86InstIdHsubps, X86XmmVar, X86Mem) + + //! Load 128-bits unaligned (SSE3). + INST_2x(lddqu, kX86InstIdLddqu, X86XmmVar, X86Mem) + + // //! Setup monitor address (SSE3). + // INST_0x(monitor, kX86InstIdMonitor) + + //! Move one DP-FP and duplicate (SSE3). + INST_2x(movddup, kX86InstIdMovddup, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movddup, kX86InstIdMovddup, X86XmmVar, X86Mem) + + //! Move packed SP-FP high and duplicate (SSE3). + INST_2x(movshdup, kX86InstIdMovshdup, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movshdup, kX86InstIdMovshdup, X86XmmVar, X86Mem) + + //! Move packed SP-FP low and duplicate (SSE3). + INST_2x(movsldup, kX86InstIdMovsldup, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(movsldup, kX86InstIdMovsldup, X86XmmVar, X86Mem) + + // //! Monitor wait (SSE3). + // INST_0x(mwait, kX86InstIdMwait) + + // -------------------------------------------------------------------------- + // [SSSE3] + // -------------------------------------------------------------------------- + + //! Packed BYTE sign (SSSE3). + INST_2x(psignb, kX86InstIdPsignb, X86MmVar, X86MmVar) + //! \overload + INST_2x(psignb, kX86InstIdPsignb, X86MmVar, X86Mem) + + //! PackedBYTE sign (SSSE3). + INST_2x(psignb, kX86InstIdPsignb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psignb, kX86InstIdPsignb, X86XmmVar, X86Mem) + + //! Packed DWORD sign (SSSE3). + INST_2x(psignd, kX86InstIdPsignd, X86MmVar, X86MmVar) + //! \overload + INST_2x(psignd, kX86InstIdPsignd, X86MmVar, X86Mem) + + //! Packed DWORD sign (SSSE3). + INST_2x(psignd, kX86InstIdPsignd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psignd, kX86InstIdPsignd, X86XmmVar, X86Mem) + + //! Packed WORD sign (SSSE3). + INST_2x(psignw, kX86InstIdPsignw, X86MmVar, X86MmVar) + //! \overload + INST_2x(psignw, kX86InstIdPsignw, X86MmVar, X86Mem) + + //! Packed WORD sign (SSSE3). + INST_2x(psignw, kX86InstIdPsignw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(psignw, kX86InstIdPsignw, X86XmmVar, X86Mem) + + //! Packed DWORD horizontal add (SSSE3). + INST_2x(phaddd, kX86InstIdPhaddd, X86MmVar, X86MmVar) + //! \overload + INST_2x(phaddd, kX86InstIdPhaddd, X86MmVar, X86Mem) + + //! Packed DWORD horizontal add (SSSE3). + INST_2x(phaddd, kX86InstIdPhaddd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(phaddd, kX86InstIdPhaddd, X86XmmVar, X86Mem) + + //! Packed WORD horizontal add with saturation (SSSE3). + INST_2x(phaddsw, kX86InstIdPhaddsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(phaddsw, kX86InstIdPhaddsw, X86MmVar, X86Mem) + + //! Packed WORD horizontal add with with saturation (SSSE3). + INST_2x(phaddsw, kX86InstIdPhaddsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(phaddsw, kX86InstIdPhaddsw, X86XmmVar, X86Mem) + + //! Packed WORD horizontal add (SSSE3). + INST_2x(phaddw, kX86InstIdPhaddw, X86MmVar, X86MmVar) + //! \overload + INST_2x(phaddw, kX86InstIdPhaddw, X86MmVar, X86Mem) + + //! Packed WORD horizontal add (SSSE3). + INST_2x(phaddw, kX86InstIdPhaddw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(phaddw, kX86InstIdPhaddw, X86XmmVar, X86Mem) + + //! Packed DWORD horizontal subtract (SSSE3). + INST_2x(phsubd, kX86InstIdPhsubd, X86MmVar, X86MmVar) + //! \overload + INST_2x(phsubd, kX86InstIdPhsubd, X86MmVar, X86Mem) + + //! Packed DWORD horizontal subtract (SSSE3). + INST_2x(phsubd, kX86InstIdPhsubd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(phsubd, kX86InstIdPhsubd, X86XmmVar, X86Mem) + + //! Packed WORD horizontal subtract with saturation (SSSE3). + INST_2x(phsubsw, kX86InstIdPhsubsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(phsubsw, kX86InstIdPhsubsw, X86MmVar, X86Mem) + + //! Packed WORD horizontal subtract with saturation (SSSE3). + INST_2x(phsubsw, kX86InstIdPhsubsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(phsubsw, kX86InstIdPhsubsw, X86XmmVar, X86Mem) + + //! Packed WORD horizontal subtract (SSSE3). + INST_2x(phsubw, kX86InstIdPhsubw, X86MmVar, X86MmVar) + //! \overload + INST_2x(phsubw, kX86InstIdPhsubw, X86MmVar, X86Mem) + + //! Packed WORD horizontal subtract (SSSE3). + INST_2x(phsubw, kX86InstIdPhsubw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(phsubw, kX86InstIdPhsubw, X86XmmVar, X86Mem) + + //! Packed multiply and add signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86MmVar, X86Mem) + + //! Packed multiply and add signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaddubsw, kX86InstIdPmaddubsw, X86XmmVar, X86Mem) + + //! Packed BYTE absolute value (SSSE3). + INST_2x(pabsb, kX86InstIdPabsb, X86MmVar, X86MmVar) + //! \overload + INST_2x(pabsb, kX86InstIdPabsb, X86MmVar, X86Mem) + + //! Packed BYTE absolute value (SSSE3). + INST_2x(pabsb, kX86InstIdPabsb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pabsb, kX86InstIdPabsb, X86XmmVar, X86Mem) + + //! Packed DWORD absolute value (SSSE3). + INST_2x(pabsd, kX86InstIdPabsd, X86MmVar, X86MmVar) + //! \overload + INST_2x(pabsd, kX86InstIdPabsd, X86MmVar, X86Mem) + + //! Packed DWORD absolute value (SSSE3). + INST_2x(pabsd, kX86InstIdPabsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pabsd, kX86InstIdPabsd, X86XmmVar, X86Mem) + + //! Packed WORD absolute value (SSSE3). + INST_2x(pabsw, kX86InstIdPabsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pabsw, kX86InstIdPabsw, X86MmVar, X86Mem) + + //! Packed WORD absolute value (SSSE3). + INST_2x(pabsw, kX86InstIdPabsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pabsw, kX86InstIdPabsw, X86XmmVar, X86Mem) + + //! Packed WORD multiply high, round and scale (SSSE3). + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86MmVar, X86MmVar) + //! \overload + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86MmVar, X86Mem) + + //! Packed WORD multiply high, round and scale (SSSE3). + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmulhrsw, kX86InstIdPmulhrsw, X86XmmVar, X86Mem) + + //! Packed BYTE shuffle (SSSE3). + INST_2x(pshufb, kX86InstIdPshufb, X86MmVar, X86MmVar) + //! \overload + INST_2x(pshufb, kX86InstIdPshufb, X86MmVar, X86Mem) + + //! Packed BYTE shuffle (SSSE3). + INST_2x(pshufb, kX86InstIdPshufb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pshufb, kX86InstIdPshufb, X86XmmVar, X86Mem) + + //! Packed align right (SSSE3). + INST_3i(palignr, kX86InstIdPalignr, X86MmVar, X86MmVar, Imm) + //! \overload + INST_3i(palignr, kX86InstIdPalignr, X86MmVar, X86Mem, Imm) + + //! Packed align right (SSSE3). + INST_3i(palignr, kX86InstIdPalignr, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(palignr, kX86InstIdPalignr, X86XmmVar, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.1] + // -------------------------------------------------------------------------- + + //! Packed DP-FP blend (SSE4.1). + INST_3i(blendpd, kX86InstIdBlendpd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(blendpd, kX86InstIdBlendpd, X86XmmVar, X86Mem, Imm) + + //! Packed SP-FP blend (SSE4.1). + INST_3i(blendps, kX86InstIdBlendps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(blendps, kX86InstIdBlendps, X86XmmVar, X86Mem, Imm) + + //! Packed DP-FP variable blend (SSE4.1). + INST_3x(blendvpd, kX86InstIdBlendvpd, X86XmmVar, X86XmmVar, X86XmmVar /* XMM0 */) + //! \overload + INST_3x(blendvpd, kX86InstIdBlendvpd, X86XmmVar, X86Mem, X86XmmVar /* XMM0 */) + + //! Packed SP-FP variable blend (SSE4.1). + INST_3x(blendvps, kX86InstIdBlendvps, X86XmmVar, X86XmmVar, X86XmmVar /* XMM0 */) + //! \overload + INST_3x(blendvps, kX86InstIdBlendvps, X86XmmVar, X86Mem, X86XmmVar /* XMM0 */) + + //! Packed DP-FP dot product (SSE4.1). + INST_3i(dppd, kX86InstIdDppd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(dppd, kX86InstIdDppd, X86XmmVar, X86Mem, Imm) + + //! Packed SP-FP dot product (SSE4.1). + INST_3i(dpps, kX86InstIdDpps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(dpps, kX86InstIdDpps, X86XmmVar, X86Mem, Imm) + + //! Extract SP-FP based on selector (SSE4.1). + INST_3i(extractps, kX86InstIdExtractps, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(extractps, kX86InstIdExtractps, X86Mem, X86XmmVar, Imm) + + //! Insert SP-FP based on selector (SSE4.1). + INST_3i(insertps, kX86InstIdInsertps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(insertps, kX86InstIdInsertps, X86XmmVar, X86Mem, Imm) + + //! Load DQWORD aligned using NT hint (SSE4.1). + INST_2x(movntdqa, kX86InstIdMovntdqa, X86XmmVar, X86Mem) + + //! Packed WORD sums of absolute difference (SSE4.1). + INST_3i(mpsadbw, kX86InstIdMpsadbw, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(mpsadbw, kX86InstIdMpsadbw, X86XmmVar, X86Mem, Imm) + + //! Pack DWORDs to WORDs with unsigned saturation (SSE4.1). + INST_2x(packusdw, kX86InstIdPackusdw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(packusdw, kX86InstIdPackusdw, X86XmmVar, X86Mem) + + //! Packed BYTE variable blend (SSE4.1). + INST_3x(pblendvb, kX86InstIdPblendvb, X86XmmVar, X86XmmVar, X86XmmVar /* XMM0 */) + //! \overload + INST_3x(pblendvb, kX86InstIdPblendvb, X86XmmVar, X86Mem, X86XmmVar /* XMM0 */) + + //! Packed WORD blend (SSE4.1). + INST_3i(pblendw, kX86InstIdPblendw, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(pblendw, kX86InstIdPblendw, X86XmmVar, X86Mem, Imm) + + //! Packed QWORD compare for equality (SSE4.1). + INST_2x(pcmpeqq, kX86InstIdPcmpeqq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpeqq, kX86InstIdPcmpeqq, X86XmmVar, X86Mem) + + //! Extract BYTE based on selector (SSE4.1). + INST_3i(pextrb, kX86InstIdPextrb, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(pextrb, kX86InstIdPextrb, X86Mem, X86XmmVar, Imm) + + //! Extract DWORD based on selector (SSE4.1). + INST_3i(pextrd, kX86InstIdPextrd, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(pextrd, kX86InstIdPextrd, X86Mem, X86XmmVar, Imm) + + //! Extract QWORD based on selector (SSE4.1). + INST_3i(pextrq, kX86InstIdPextrq, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(pextrq, kX86InstIdPextrq, X86Mem, X86XmmVar, Imm) + + //! Extract WORD based on selector (SSE4.1). + INST_3i(pextrw, kX86InstIdPextrw, X86Mem, X86XmmVar, Imm) + + //! Packed WORD horizontal minimum (SSE4.1). + INST_2x(phminposuw, kX86InstIdPhminposuw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(phminposuw, kX86InstIdPhminposuw, X86XmmVar, X86Mem) + + //! Insert BYTE based on selector (SSE4.1). + INST_3i(pinsrb, kX86InstIdPinsrb, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_3i(pinsrb, kX86InstIdPinsrb, X86XmmVar, X86Mem, Imm) + + //! Insert DWORD based on selector (SSE4.1). + INST_3i(pinsrd, kX86InstIdPinsrd, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_3i(pinsrd, kX86InstIdPinsrd, X86XmmVar, X86Mem, Imm) + + //! Insert QWORD based on selector (SSE4.1). + INST_3i(pinsrq, kX86InstIdPinsrq, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_3i(pinsrq, kX86InstIdPinsrq, X86XmmVar, X86Mem, Imm) + + //! Packed BYTE maximum (SSE4.1). + INST_2x(pmaxsb, kX86InstIdPmaxsb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaxsb, kX86InstIdPmaxsb, X86XmmVar, X86Mem) + + //! Packed DWORD maximum (SSE4.1). + INST_2x(pmaxsd, kX86InstIdPmaxsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaxsd, kX86InstIdPmaxsd, X86XmmVar, X86Mem) + + //! Packed DWORD unsigned maximum (SSE4.1). + INST_2x(pmaxud, kX86InstIdPmaxud, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaxud,kX86InstIdPmaxud , X86XmmVar, X86Mem) + + //! Packed WORD unsigned maximum (SSE4.1). + INST_2x(pmaxuw, kX86InstIdPmaxuw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmaxuw, kX86InstIdPmaxuw, X86XmmVar, X86Mem) + + //! Packed BYTE minimum (SSE4.1). + INST_2x(pminsb, kX86InstIdPminsb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pminsb, kX86InstIdPminsb, X86XmmVar, X86Mem) + + //! Packed DWORD minimum (SSE4.1). + INST_2x(pminsd, kX86InstIdPminsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pminsd, kX86InstIdPminsd, X86XmmVar, X86Mem) + + //! Packed WORD unsigned minimum (SSE4.1). + INST_2x(pminuw, kX86InstIdPminuw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pminuw, kX86InstIdPminuw, X86XmmVar, X86Mem) + + //! Packed DWORD unsigned minimum (SSE4.1). + INST_2x(pminud, kX86InstIdPminud, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pminud, kX86InstIdPminud, X86XmmVar, X86Mem) + + //! Packed BYTE to DWORD with sign extend (SSE4.1). + INST_2x(pmovsxbd, kX86InstIdPmovsxbd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovsxbd, kX86InstIdPmovsxbd, X86XmmVar, X86Mem) + + //! Packed BYTE to QWORD with sign extend (SSE4.1). + INST_2x(pmovsxbq, kX86InstIdPmovsxbq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovsxbq, kX86InstIdPmovsxbq, X86XmmVar, X86Mem) + + //! Packed BYTE to WORD with sign extend (SSE4.1). + INST_2x(pmovsxbw, kX86InstIdPmovsxbw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovsxbw, kX86InstIdPmovsxbw, X86XmmVar, X86Mem) + + //! Packed DWORD to QWORD with sign extend (SSE4.1). + INST_2x(pmovsxdq, kX86InstIdPmovsxdq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovsxdq, kX86InstIdPmovsxdq, X86XmmVar, X86Mem) + + //! Packed WORD to DWORD with sign extend (SSE4.1). + INST_2x(pmovsxwd, kX86InstIdPmovsxwd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovsxwd, kX86InstIdPmovsxwd, X86XmmVar, X86Mem) + + //! Packed WORD to QWORD with sign extend (SSE4.1). + INST_2x(pmovsxwq, kX86InstIdPmovsxwq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovsxwq, kX86InstIdPmovsxwq, X86XmmVar, X86Mem) + + //! BYTE to DWORD with zero extend (SSE4.1). + INST_2x(pmovzxbd, kX86InstIdPmovzxbd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovzxbd, kX86InstIdPmovzxbd, X86XmmVar, X86Mem) + + //! Packed BYTE to QWORD with zero extend (SSE4.1). + INST_2x(pmovzxbq, kX86InstIdPmovzxbq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovzxbq, kX86InstIdPmovzxbq, X86XmmVar, X86Mem) + + //! BYTE to WORD with zero extend (SSE4.1). + INST_2x(pmovzxbw, kX86InstIdPmovzxbw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovzxbw, kX86InstIdPmovzxbw, X86XmmVar, X86Mem) + + //! Packed DWORD to QWORD with zero extend (SSE4.1). + INST_2x(pmovzxdq, kX86InstIdPmovzxdq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovzxdq, kX86InstIdPmovzxdq, X86XmmVar, X86Mem) + + //! Packed WORD to DWORD with zero extend (SSE4.1). + INST_2x(pmovzxwd, kX86InstIdPmovzxwd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovzxwd, kX86InstIdPmovzxwd, X86XmmVar, X86Mem) + + //! Packed WORD to QWORD with zero extend (SSE4.1). + INST_2x(pmovzxwq, kX86InstIdPmovzxwq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmovzxwq, kX86InstIdPmovzxwq, X86XmmVar, X86Mem) + + //! Packed DWORD to QWORD multiply (SSE4.1). + INST_2x(pmuldq, kX86InstIdPmuldq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmuldq, kX86InstIdPmuldq, X86XmmVar, X86Mem) + + //! Packed DWORD multiply low (SSE4.1). + INST_2x(pmulld, kX86InstIdPmulld, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pmulld, kX86InstIdPmulld, X86XmmVar, X86Mem) + + //! Logical compare (SSE4.1). + INST_2x(ptest, kX86InstIdPtest, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(ptest, kX86InstIdPtest, X86XmmVar, X86Mem) + + //! Packed DP-FP round (SSE4.1). + INST_3i(roundpd, kX86InstIdRoundpd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(roundpd, kX86InstIdRoundpd, X86XmmVar, X86Mem, Imm) + + //! Packed SP-FP round (SSE4.1). + INST_3i(roundps, kX86InstIdRoundps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(roundps, kX86InstIdRoundps, X86XmmVar, X86Mem, Imm) + + //! Scalar DP-FP round (SSE4.1). + INST_3i(roundsd, kX86InstIdRoundsd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(roundsd, kX86InstIdRoundsd, X86XmmVar, X86Mem, Imm) + + //! Scalar SP-FP round (SSE4.1). + INST_3i(roundss, kX86InstIdRoundss, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(roundss, kX86InstIdRoundss, X86XmmVar, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.2] + // -------------------------------------------------------------------------- + + //! Accumulate CRC32 value (polynomial 0x11EDC6F41) (SSE4.2). + INST_2x(crc32, kX86InstIdCrc32, X86GpVar, X86GpVar) + //! \overload + INST_2x(crc32, kX86InstIdCrc32, X86GpVar, X86Mem) + + //! Packed compare explicit length strings, return index in ECX (SSE4.2). + INST_4i(pcmpestri, kX86InstIdPcmpestri, X86GpVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(pcmpestri, kX86InstIdPcmpestri, X86GpVar, X86XmmVar, X86Mem, Imm) + + //! Packed compare explicit length strings, return mask in XMM0 (SSE4.2). + INST_4i(pcmpestrm, kX86InstIdPcmpestrm, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(pcmpestrm, kX86InstIdPcmpestrm, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed compare implicit length strings, return index in ECX (SSE4.2). + INST_4i(pcmpistri, kX86InstIdPcmpistri, X86GpVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(pcmpistri, kX86InstIdPcmpistri, X86GpVar, X86XmmVar, X86Mem, Imm) + + //! Packed compare implicit length strings, return mask in XMM0 (SSE4.2). + INST_4i(pcmpistrm, kX86InstIdPcmpistrm, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(pcmpistrm, kX86InstIdPcmpistrm, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed QWORD compare if greater than (SSE4.2). + INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmVar, X86Mem) + + // -------------------------------------------------------------------------- + // [SSE4a] + // -------------------------------------------------------------------------- + + //! Extract Field (SSE4a). + INST_2x(extrq, kX86InstIdExtrq, X86XmmVar, X86XmmVar) + //! Extract Field (SSE4a). + INST_3ii(extrq, kX86InstIdExtrq, X86XmmVar, Imm, Imm) + + //! Insert Field (SSE4a). + INST_2x(insertq, kX86InstIdInsertq, X86XmmVar, X86XmmVar) + //! Insert Field (SSE4a). + INST_4ii(insertq, kX86InstIdInsertq, X86XmmVar, X86XmmVar, Imm, Imm) + + //! Move Non-Temporal Scalar DP-FP (SSE4a). + INST_2x(movntsd, kX86InstIdMovntsd, X86Mem, X86XmmVar) + //! Move Non-Temporal Scalar SP-FP (SSE4a). + INST_2x(movntss, kX86InstIdMovntss, X86Mem, X86XmmVar) + + // -------------------------------------------------------------------------- + // [AESNI] + // -------------------------------------------------------------------------- + + //! Perform a single round of the AES decryption flow. + INST_2x(aesdec, kX86InstIdAesdec, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(aesdec, kX86InstIdAesdec, X86XmmVar, X86Mem) + + //! Perform the last round of the AES decryption flow. + INST_2x(aesdeclast, kX86InstIdAesdeclast, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(aesdeclast, kX86InstIdAesdeclast, X86XmmVar, X86Mem) + + //! Perform a single round of the AES encryption flow. + INST_2x(aesenc, kX86InstIdAesenc, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(aesenc, kX86InstIdAesenc, X86XmmVar, X86Mem) + + //! Perform the last round of the AES encryption flow. + INST_2x(aesenclast, kX86InstIdAesenclast, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(aesenclast, kX86InstIdAesenclast, X86XmmVar, X86Mem) + + //! Perform the InvMixColumns transformation. + INST_2x(aesimc, kX86InstIdAesimc, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(aesimc, kX86InstIdAesimc, X86XmmVar, X86Mem) + + //! Assist in expanding the AES cipher key. + INST_3i(aeskeygenassist, kX86InstIdAeskeygenassist, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(aeskeygenassist, kX86InstIdAeskeygenassist, X86XmmVar, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [SHA] + // -------------------------------------------------------------------------- + + //! Perform an intermediate calculation for the next four SHA1 message DWORDs (SHA). + INST_2x(sha1msg1, kX86InstIdSha1msg1, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sha1msg1, kX86InstIdSha1msg1, X86XmmVar, X86Mem) + + //! Perform a final calculation for the next four SHA1 message DWORDs (SHA). + INST_2x(sha1msg2, kX86InstIdSha1msg2, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sha1msg2, kX86InstIdSha1msg2, X86XmmVar, X86Mem) + + //! Calculate SHA1 state variable E after four rounds (SHA). + INST_2x(sha1nexte, kX86InstIdSha1nexte, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sha1nexte, kX86InstIdSha1nexte, X86XmmVar, X86Mem) + + //! Perform four rounds of SHA1 operation (SHA). + INST_3i(sha1rnds4, kX86InstIdSha1rnds4, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(sha1rnds4, kX86InstIdSha1rnds4, X86XmmVar, X86Mem, Imm) + + //! Perform an intermediate calculation for the next four SHA256 message DWORDs (SHA). + INST_2x(sha256msg1, kX86InstIdSha256msg1, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sha256msg1, kX86InstIdSha256msg1, X86XmmVar, X86Mem) + + //! Perform a final calculation for the next four SHA256 message DWORDs (SHA). + INST_2x(sha256msg2, kX86InstIdSha256msg2, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(sha256msg2, kX86InstIdSha256msg2, X86XmmVar, X86Mem) + + //! Perform two rounds of SHA256 operation (SHA). + INST_3x(sha256rnds2, kX86InstIdSha256rnds2, X86XmmVar, X86XmmVar, X86XmmVar /* XMM0 */) + //! \overload + INST_3x(sha256rnds2, kX86InstIdSha256rnds2, X86XmmVar, X86Mem, X86XmmVar /* XMM0 */) + + // -------------------------------------------------------------------------- + // [PCLMULQDQ] + // -------------------------------------------------------------------------- + + //! Packed QWORD to DQWORD carry-less multiply (PCLMULQDQ). + INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmVar, X86XmmVar, Imm); + //! \overload + INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmVar, X86Mem, Imm); + + // -------------------------------------------------------------------------- + // [AVX] + // -------------------------------------------------------------------------- + + //! Packed DP-FP add (AVX). + INST_3x(vaddpd, kX86InstIdVaddpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaddpd, kX86InstIdVaddpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vaddpd, kX86InstIdVaddpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vaddpd, kX86InstIdVaddpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP add (AVX). + INST_3x(vaddps, kX86InstIdVaddps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaddps, kX86InstIdVaddps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vaddps, kX86InstIdVaddps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vaddps, kX86InstIdVaddps, X86YmmVar, X86YmmVar, X86Mem) + + //! Scalar DP-FP add (AVX) + INST_3x(vaddsd, kX86InstIdVaddsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaddsd, kX86InstIdVaddsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Scalar SP-FP add (AVX) + INST_3x(vaddss, kX86InstIdVaddss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaddss, kX86InstIdVaddss, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DP-FP add/subtract (AVX). + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vaddsubpd, kX86InstIdVaddsubpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP add/subtract (AVX). + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vaddsubps, kX86InstIdVaddsubps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DP-FP bitwise and (AVX). + INST_3x(vandpd, kX86InstIdVandpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vandpd, kX86InstIdVandpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vandpd, kX86InstIdVandpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vandpd, kX86InstIdVandpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP bitwise and (AVX). + INST_3x(vandps, kX86InstIdVandps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vandps, kX86InstIdVandps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vandps, kX86InstIdVandps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vandps, kX86InstIdVandps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DP-FP bitwise and-not (AVX). + INST_3x(vandnpd, kX86InstIdVandnpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vandnpd, kX86InstIdVandnpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vandnpd, kX86InstIdVandnpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vandnpd, kX86InstIdVandnpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP bitwise and-not (AVX). + INST_3x(vandnps, kX86InstIdVandnps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vandnps, kX86InstIdVandnps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vandnps, kX86InstIdVandnps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vandnps, kX86InstIdVandnps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DP-FP blend (AVX). + INST_4i(vblendpd, kX86InstIdVblendpd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vblendpd, kX86InstIdVblendpd, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vblendpd, kX86InstIdVblendpd, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vblendpd, kX86InstIdVblendpd, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed SP-FP blend (AVX). + INST_4i(vblendps, kX86InstIdVblendps, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vblendps, kX86InstIdVblendps, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vblendps, kX86InstIdVblendps, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vblendps, kX86InstIdVblendps, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed DP-FP variable blend (AVX). + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + //! \overload + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_4x(vblendvpd, kX86InstIdVblendvpd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + + //! Packed SP-FP variable blend (AVX). + INST_4x(vblendvps, kX86InstIdVblendvps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_4x(vblendvps, kX86InstIdVblendvps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + //! \overload + INST_4x(vblendvps, kX86InstIdVblendvps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_4x(vblendvps, kX86InstIdVblendvps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + + //! Broadcast 128-bits of FP data in `o1` to low and high 128-bits in `o0` (AVX). + INST_2x(vbroadcastf128, kX86InstIdVbroadcastf128, X86YmmVar, X86Mem) + //! Broadcast DP-FP element in `o1` to four locations in `o0` (AVX). + INST_2x(vbroadcastsd, kX86InstIdVbroadcastsd, X86YmmVar, X86Mem) + //! Broadcast SP-FP element in `o1` to four locations in `o0` (AVX). + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86XmmVar, X86Mem) + //! Broadcast SP-FP element in `o1` to eight locations in `o0` (AVX). + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86YmmVar, X86Mem) + + //! Packed DP-FP compare (AVX). + INST_4i(vcmppd, kX86InstIdVcmppd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vcmppd, kX86InstIdVcmppd, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vcmppd, kX86InstIdVcmppd, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vcmppd, kX86InstIdVcmppd, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed SP-FP compare (AVX). + INST_4i(vcmpps, kX86InstIdVcmpps, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vcmpps, kX86InstIdVcmpps, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vcmpps, kX86InstIdVcmpps, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vcmpps, kX86InstIdVcmpps, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Scalar DP-FP compare (AVX). + INST_4i(vcmpsd, kX86InstIdVcmpsd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vcmpsd, kX86InstIdVcmpsd, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Scalar SP-FP compare (AVX). + INST_4i(vcmpss, kX86InstIdVcmpss, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vcmpss, kX86InstIdVcmpss, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Scalar DP-FP ordered compare and set EFLAGS (AVX). + INST_2x(vcomisd, kX86InstIdVcomisd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcomisd, kX86InstIdVcomisd, X86XmmVar, X86Mem) + + //! Scalar SP-FP ordered compare and set EFLAGS (AVX). + INST_2x(vcomiss, kX86InstIdVcomiss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcomiss, kX86InstIdVcomiss, X86XmmVar, X86Mem) + + //! Convert packed QWORDs to packed DP-FP (AVX). + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86YmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtdq2pd, kX86InstIdVcvtdq2pd, X86YmmVar, X86Mem) + + //! Convert packed QWORDs to packed SP-FP (AVX). + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86XmmVar, X86Mem) + //! \overload + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86YmmVar, X86Mem) + + //! Convert packed DP-FP to packed DWORDs (AVX). + INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmVar, X86YmmVar) + //! \overload + INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmVar, X86Mem) + + //! Convert packed DP-FP to packed SP-FP (AVX). + INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmVar, X86YmmVar) + //! \overload + INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmVar, X86Mem) + + //! Convert packed SP-FP to packed DWORDs (AVX). + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmVar, X86Mem) + //! \overload + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86YmmVar, X86Mem) + + //! Convert packed SP-FP to packed DP-FP (AVX). + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86YmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtps2pd, kX86InstIdVcvtps2pd, X86YmmVar, X86Mem) + + //! Convert scalar DP-FP to DWORD (AVX). + INST_2x(vcvtsd2si, kX86InstIdVcvtsd2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(vcvtsd2si, kX86InstIdVcvtsd2si, X86GpVar, X86Mem) + + //! Convert scalar DP-FP to scalar SP-FP (AVX). + INST_3x(vcvtsd2ss, kX86InstIdVcvtsd2ss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vcvtsd2ss, kX86InstIdVcvtsd2ss, X86XmmVar, X86XmmVar, X86Mem) + + //! Convert DWORD integer to scalar DP-FP (AVX). + INST_3x(vcvtsi2sd, kX86InstIdVcvtsi2sd, X86XmmVar, X86XmmVar, X86GpVar) + //! \overload + INST_3x(vcvtsi2sd, kX86InstIdVcvtsi2sd, X86XmmVar, X86XmmVar, X86Mem) + + //! Convert scalar INT32 to SP-FP (AVX). + INST_3x(vcvtsi2ss, kX86InstIdVcvtsi2ss, X86XmmVar, X86XmmVar, X86GpVar) + //! \overload + INST_3x(vcvtsi2ss, kX86InstIdVcvtsi2ss, X86XmmVar, X86XmmVar, X86Mem) + + //! Convert scalar SP-FP to DP-FP (AVX). + INST_3x(vcvtss2sd, kX86InstIdVcvtss2sd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vcvtss2sd, kX86InstIdVcvtss2sd, X86XmmVar, X86XmmVar, X86Mem) + + //! Convert scalar SP-FP to INT32 (AVX). + INST_2x(vcvtss2si, kX86InstIdVcvtss2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(vcvtss2si, kX86InstIdVcvtss2si, X86GpVar, X86Mem) + + //! Convert with truncation packed DP-FP to packed DWORDs (AVX). + INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmVar, X86YmmVar) + //! \overload + INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmVar, X86Mem) + + //! Convert with truncation packed SP-FP to packed DWORDs (AVX). + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmVar, X86Mem) + //! \overload + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86YmmVar, X86Mem) + + //! Convert with truncation scalar DP-FP to INT32 (AVX). + INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpVar, X86Mem) + + //! Convert with truncation scalar SP-FP to INT32 (AVX). + INST_2x(vcvttss2si, kX86InstIdVcvttss2si, X86GpVar, X86XmmVar) + //! \overload + INST_2x(vcvttss2si, kX86InstIdVcvttss2si, X86GpVar, X86Mem) + + //! Packed DP-FP divide (AVX). + INST_3x(vdivpd, kX86InstIdVdivpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vdivpd, kX86InstIdVdivpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vdivpd, kX86InstIdVdivpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vdivpd, kX86InstIdVdivpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP divide (AVX). + INST_3x(vdivps, kX86InstIdVdivps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vdivps, kX86InstIdVdivps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vdivps, kX86InstIdVdivps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vdivps, kX86InstIdVdivps, X86YmmVar, X86YmmVar, X86Mem) + + //! Scalar DP-FP divide (AVX). + INST_3x(vdivsd, kX86InstIdVdivsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vdivsd, kX86InstIdVdivsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Scalar SP-FP divide (AVX). + INST_3x(vdivss, kX86InstIdVdivss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vdivss, kX86InstIdVdivss, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DP-FP dot product (AVX). + INST_4i(vdppd, kX86InstIdVdppd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vdppd, kX86InstIdVdppd, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed SP-FP dot product (AVX). + INST_4i(vdpps, kX86InstIdVdpps, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vdpps, kX86InstIdVdpps, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vdpps, kX86InstIdVdpps, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vdpps, kX86InstIdVdpps, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Extract 128 bits of packed FP data from `o1` and store results in `o0` (AVX). + INST_3i(vextractf128, kX86InstIdVextractf128, X86XmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vextractf128, kX86InstIdVextractf128, X86Mem, X86YmmVar, Imm) + + //! Extract SP-FP based on selector (AVX). + INST_3i(vextractps, kX86InstIdVextractps, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(vextractps, kX86InstIdVextractps, X86Mem, X86XmmVar, Imm) + + //! Packed DP-FP horizontal add (AVX). + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vhaddpd, kX86InstIdVhaddpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP horizontal add (AVX). + INST_3x(vhaddps, kX86InstIdVhaddps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vhaddps, kX86InstIdVhaddps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vhaddps, kX86InstIdVhaddps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vhaddps, kX86InstIdVhaddps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DP-FP horizontal subtract (AVX). + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vhsubpd, kX86InstIdVhsubpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP horizontal subtract (AVX). + INST_3x(vhsubps, kX86InstIdVhsubps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vhsubps, kX86InstIdVhsubps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vhsubps, kX86InstIdVhsubps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vhsubps, kX86InstIdVhsubps, X86YmmVar, X86YmmVar, X86Mem) + + //! Insert 128-bit of packed FP data based on selector (AVX). + INST_4i(vinsertf128, kX86InstIdVinsertf128, X86YmmVar, X86YmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vinsertf128, kX86InstIdVinsertf128, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Insert SP-FP based on selector (AVX). + INST_4i(vinsertps, kX86InstIdVinsertps, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vinsertps, kX86InstIdVinsertps, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Load 128-bits unaligned (AVX). + INST_2x(vlddqu, kX86InstIdVlddqu, X86XmmVar, X86Mem) + //! Load 256-bits unaligned (AVX). + INST_2x(vlddqu, kX86InstIdVlddqu, X86YmmVar, X86Mem) + + //! Load streaming SIMD extension control/status (AVX). + INST_1x(vldmxcsr, kX86InstIdVldmxcsr, X86Mem) + + //! Store selected bytes of DQWORD to DS:EDI/RDI (AVX). + INST_3x(vmaskmovdqu, kX86InstIdMaskmovdqu, X86GpVar /* ZDI */, X86XmmVar, X86XmmVar) + + //! Conditionally load packed DP-FP from `o2` using mask in `o1 and store in `o0` (AVX). + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86Mem, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmaskmovpd, kX86InstIdVmaskmovpd, X86Mem, X86YmmVar, X86YmmVar) + + //! Conditionally load packed SP-FP from `o2` using mask in `o1 and store in `o0` (AVX). + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86Mem, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmaskmovps, kX86InstIdVmaskmovps, X86Mem, X86YmmVar, X86YmmVar) + + //! Packed DP-FP maximum (AVX). + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vmaxpd, kX86InstIdVmaxpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP maximum (AVX). + INST_3x(vmaxps, kX86InstIdVmaxps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmaxps, kX86InstIdVmaxps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vmaxps, kX86InstIdVmaxps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vmaxps, kX86InstIdVmaxps, X86YmmVar, X86YmmVar, X86Mem) + + //! Scalar DP-FP maximum (AVX). + INST_3x(vmaxsd, kX86InstIdVmaxsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmaxsd, kX86InstIdVmaxsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Scalar SP-FP maximum (AVX). + INST_3x(vmaxss, kX86InstIdVmaxss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmaxss, kX86InstIdVmaxss, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DP-FP minimum (AVX). + INST_3x(vminpd, kX86InstIdVminpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vminpd, kX86InstIdVminpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vminpd, kX86InstIdVminpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vminpd, kX86InstIdVminpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP minimum (AVX). + INST_3x(vminps, kX86InstIdVminps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vminps, kX86InstIdVminps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vminps, kX86InstIdVminps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vminps, kX86InstIdVminps, X86YmmVar, X86YmmVar, X86Mem) + + //! Scalar DP-FP minimum (AVX). + INST_3x(vminsd, kX86InstIdVminsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vminsd, kX86InstIdVminsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Scalar SP-FP minimum (AVX). + INST_3x(vminss, kX86InstIdVminss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vminss, kX86InstIdVminss, X86XmmVar, X86XmmVar, X86Mem) + + //! Move 128-bits of aligned packed DP-FP (AVX). + INST_2x(vmovapd, kX86InstIdVmovapd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86Mem, X86XmmVar) + //! Move 256-bits of aligned packed DP-FP (AVX). + INST_2x(vmovapd, kX86InstIdVmovapd, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86YmmVar, X86Mem) + //! \overload + INST_2x(vmovapd, kX86InstIdVmovapd, X86Mem, X86YmmVar) + + //! Move 128-bits of aligned packed SP-FP (AVX). + INST_2x(vmovaps, kX86InstIdVmovaps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86Mem, X86XmmVar) + //! Move 256-bits of aligned packed SP-FP (AVX). + INST_2x(vmovaps, kX86InstIdVmovaps, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86YmmVar, X86Mem) + //! \overload + INST_2x(vmovaps, kX86InstIdVmovaps, X86Mem, X86YmmVar) + + //! Move DWORD (AVX). + INST_2x(vmovd, kX86InstIdVmovd, X86XmmVar, X86GpVar) + //! \overload + INST_2x(vmovd, kX86InstIdVmovd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovd, kX86InstIdVmovd, X86GpVar, X86XmmVar) + //! \overload + INST_2x(vmovd, kX86InstIdVmovd, X86Mem, X86XmmVar) + + //! Move QWORD (AVX). + INST_2x(vmovq, kX86InstIdVmovq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovq, kX86InstIdVmovq, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovq, kX86InstIdVmovq, X86Mem, X86XmmVar) + + //! Move QWORD (AVX and X64 Only). + INST_2x(vmovq, kX86InstIdVmovq, X86XmmVar, X86GpVar) + //! \overload + INST_2x(vmovq, kX86InstIdVmovq, X86GpVar, X86XmmVar) + + //! Move one DP-FP and duplicate (AVX). + INST_2x(vmovddup, kX86InstIdVmovddup, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovddup, kX86InstIdVmovddup, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovddup, kX86InstIdVmovddup, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovddup, kX86InstIdVmovddup, X86YmmVar, X86Mem) + + //! Move 128-bits aligned (AVX). + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86Mem, X86XmmVar) + //! Move 256-bits aligned (AVX). + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86YmmVar, X86Mem) + //! \overload + INST_2x(vmovdqa, kX86InstIdVmovdqa, X86Mem, X86YmmVar) + + //! Move 128-bits unaligned (AVX). + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86Mem, X86XmmVar) + //! Move 256-bits unaligned (AVX). + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86YmmVar, X86Mem) + //! \overload + INST_2x(vmovdqu, kX86InstIdVmovdqu, X86Mem, X86YmmVar) + + //! High to low packed SP-FP (AVX). + INST_3x(vmovhlps, kX86InstIdVmovhlps, X86XmmVar, X86XmmVar, X86XmmVar) + + //! Move high packed DP-FP (AVX). + INST_3x(vmovhpd, kX86InstIdVmovhpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovhpd, kX86InstIdVmovhpd, X86Mem, X86XmmVar) + + //! Move high packed SP-FP (AVX). + INST_3x(vmovhps, kX86InstIdVmovhps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovhps, kX86InstIdVmovhps, X86Mem, X86XmmVar) + + //! Move low to high packed SP-FP (AVX). + INST_3x(vmovlhps, kX86InstIdVmovlhps, X86XmmVar, X86XmmVar, X86XmmVar) + + //! Move low packed DP-FP (AVX). + INST_3x(vmovlpd, kX86InstIdVmovlpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovlpd, kX86InstIdVmovlpd, X86Mem, X86XmmVar) + + //! Move low packed SP-FP (AVX). + INST_3x(vmovlps, kX86InstIdVmovlps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovlps, kX86InstIdVmovlps, X86Mem, X86XmmVar) + + //! Extract packed DP-FP sign mask (AVX). + INST_2x(vmovmskpd, kX86InstIdVmovmskpd, X86GpVar, X86XmmVar) + //! \overload + INST_2x(vmovmskpd, kX86InstIdVmovmskpd, X86GpVar, X86YmmVar) + + //! Extract packed SP-FP sign mask (AVX). + INST_2x(vmovmskps, kX86InstIdVmovmskps, X86GpVar, X86XmmVar) + //! \overload + INST_2x(vmovmskps, kX86InstIdVmovmskps, X86GpVar, X86YmmVar) + + //! Store 128-bits using NT hint (AVX). + INST_2x(vmovntdq, kX86InstIdVmovntdq, X86Mem, X86XmmVar) + //! Store 256-bits using NT hint (AVX). + INST_2x(vmovntdq, kX86InstIdVmovntdq, X86Mem, X86YmmVar) + + //! Store 128-bits aligned using NT hint (AVX). + INST_2x(vmovntdqa, kX86InstIdVmovntdqa, X86XmmVar, X86Mem) + + //! Store packed DP-FP (128-bits) using NT hint (AVX). + INST_2x(vmovntpd, kX86InstIdVmovntpd, X86Mem, X86XmmVar) + //! Store packed DP-FP (256-bits) using NT hint (AVX). + INST_2x(vmovntpd, kX86InstIdVmovntpd, X86Mem, X86YmmVar) + + //! Store packed SP-FP (128-bits) using NT hint (AVX). + INST_2x(vmovntps, kX86InstIdVmovntps, X86Mem, X86XmmVar) + //! Store packed SP-FP (256-bits) using NT hint (AVX). + INST_2x(vmovntps, kX86InstIdVmovntps, X86Mem, X86YmmVar) + + //! Move scalar DP-FP (AVX). + INST_3x(vmovsd, kX86InstIdVmovsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovsd, kX86InstIdVmovsd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovsd, kX86InstIdVmovsd, X86Mem, X86XmmVar) + + //! Move packed SP-FP high and duplicate (AVX). + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovshdup, kX86InstIdVmovshdup, X86YmmVar, X86Mem) + + //! Move packed SP-FP low and duplicate (AVX). + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovsldup, kX86InstIdVmovsldup, X86YmmVar, X86Mem) + + //! Move scalar SP-FP (AVX). + INST_3x(vmovss, kX86InstIdVmovss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovss, kX86InstIdVmovss, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovss, kX86InstIdVmovss, X86Mem, X86XmmVar) + + //! Move 128-bits of unaligned packed DP-FP (AVX). + INST_2x(vmovupd, kX86InstIdVmovupd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86Mem, X86XmmVar) + //! Move 256-bits of unaligned packed DP-FP (AVX). + INST_2x(vmovupd, kX86InstIdVmovupd, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86YmmVar, X86Mem) + //! \overload + INST_2x(vmovupd, kX86InstIdVmovupd, X86Mem, X86YmmVar) + + //! Move 128-bits of unaligned packed SP-FP (AVX). + INST_2x(vmovups, kX86InstIdVmovups, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86XmmVar, X86Mem) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86Mem, X86XmmVar) + //! Move 256-bits of unaligned packed SP-FP (AVX). + INST_2x(vmovups, kX86InstIdVmovups, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86YmmVar, X86Mem) + //! \overload + INST_2x(vmovups, kX86InstIdVmovups, X86Mem, X86YmmVar) + + //! Packed WORD sums of absolute difference (AVX). + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed DP-FP multiply (AVX). + INST_3x(vmulpd, kX86InstIdVmulpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmulpd, kX86InstIdVmulpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vmulpd, kX86InstIdVmulpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vmulpd, kX86InstIdVmulpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP multiply (AVX). + INST_3x(vmulps, kX86InstIdVmulps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmulps, kX86InstIdVmulps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vmulps, kX86InstIdVmulps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vmulps, kX86InstIdVmulps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP multiply (AVX). + INST_3x(vmulsd, kX86InstIdVmulsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmulsd, kX86InstIdVmulsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Scalar SP-FP multiply (AVX). + INST_3x(vmulss, kX86InstIdVmulss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vmulss, kX86InstIdVmulss, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DP-FP bitwise or (AVX). + INST_3x(vorpd, kX86InstIdVorpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vorpd, kX86InstIdVorpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vorpd, kX86InstIdVorpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vorpd, kX86InstIdVorpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP bitwise or (AVX). + INST_3x(vorps, kX86InstIdVorps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vorps, kX86InstIdVorps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vorps, kX86InstIdVorps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vorps, kX86InstIdVorps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed BYTE absolute value (AVX). + INST_2x(vpabsb, kX86InstIdVpabsb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpabsb, kX86InstIdVpabsb, X86XmmVar, X86Mem) + + //! Packed DWORD absolute value (AVX). + INST_2x(vpabsd, kX86InstIdVpabsd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpabsd, kX86InstIdVpabsd, X86XmmVar, X86Mem) + + //! Packed WORD absolute value (AVX). + INST_2x(vpabsw, kX86InstIdVpabsw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpabsw, kX86InstIdVpabsw, X86XmmVar, X86Mem) + + //! Pack DWORDs to WORDs with signed saturation (AVX). + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86XmmVar, X86XmmVar, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (AVX). + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86XmmVar, X86XmmVar, X86Mem) + + //! Pack DWORDs to WORDs with unsigned saturation (AVX). + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86XmmVar, X86XmmVar, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (AVX). + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE add (AVX). + INST_3x(vpaddb, kX86InstIdVpaddb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddb, kX86InstIdVpaddb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD add (AVX). + INST_3x(vpaddd, kX86InstIdVpaddd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddd, kX86InstIdVpaddd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed QWORD add (AVX). + INST_3x(vpaddq, kX86InstIdVpaddq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddq, kX86InstIdVpaddq, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD add (AVX). + INST_3x(vpaddw, kX86InstIdVpaddw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddw, kX86InstIdVpaddw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE add with saturation (AVX). + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD add with saturation (AVX). + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE add with unsigned saturation (AVX). + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD add with unsigned saturation (AVX). + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed align right (AVX). + INST_4i(vpalignr, kX86InstIdVpalignr, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vpalignr, kX86InstIdVpalignr, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed bitwise and (AVX). + INST_3x(vpand, kX86InstIdVpand, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpand, kX86InstIdVpand, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed bitwise and-not (AVX). + INST_3x(vpandn, kX86InstIdVpandn, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpandn, kX86InstIdVpandn, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE average (AVX). + INST_3x(vpavgb, kX86InstIdVpavgb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpavgb, kX86InstIdVpavgb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD average (AVX). + INST_3x(vpavgw, kX86InstIdVpavgw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpavgw, kX86InstIdVpavgw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE variable blend (AVX). + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + + //! Packed WORD blend (AVX). + INST_4i(vpblendw, kX86InstIdVpblendw, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vpblendw, kX86InstIdVpblendw, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed BYTEs compare for equality (AVX). + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORDs compare for equality (AVX). + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed QWORDs compare for equality (AVX). + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORDs compare for equality (AVX). + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTEs compare if greater than (AVX). + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORDs compare if greater than (AVX). + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed QWORDs compare if greater than (AVX). + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORDs compare if greater than (AVX). + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed compare explicit length strings, return index in ECX (AVX). + INST_3i(vpcmpestri, kX86InstIdVpcmpestri, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpcmpestri, kX86InstIdVpcmpestri, X86XmmVar, X86Mem, Imm) + + //! Packed compare explicit length strings, return mask in XMM0 (AVX). + INST_3i(vpcmpestrm, kX86InstIdVpcmpestrm, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpcmpestrm, kX86InstIdVpcmpestrm, X86XmmVar, X86Mem, Imm) + + //! Packed compare implicit length strings, return index in ECX (AVX). + INST_4i(vpcmpistri, kX86InstIdVpcmpistri, X86GpVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vpcmpistri, kX86InstIdVpcmpistri, X86GpVar, X86XmmVar, X86Mem, Imm) + + //! Packed compare implicit length strings, return mask in XMM0 (AVX). + INST_4i(vpcmpistrm, kX86InstIdVpcmpistrm, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vpcmpistrm, kX86InstIdVpcmpistrm, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed DP-FP permute (AVX). + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpermilpd, kX86InstIdVpermilpd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86XmmVar, X86Mem, Imm) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vpermilpd, kX86InstIdVpermilpd, X86YmmVar, X86Mem, Imm) + + //! Packed SP-FP permute (AVX). + INST_3x(vpermilps, kX86InstIdVpermilps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpermilps, kX86InstIdVpermilps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vpermilps, kX86InstIdVpermilps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpermilps, kX86InstIdVpermilps, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86XmmVar, X86Mem, Imm) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vpermilps, kX86InstIdVpermilps, X86YmmVar, X86Mem, Imm) + + //! Packed 128-bit FP permute (AVX). + INST_4i(vperm2f128, kX86InstIdVperm2f128, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vperm2f128, kX86InstIdVperm2f128, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Extract BYTE (AVX). + INST_3i(vpextrb, kX86InstIdVpextrb, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpextrb, kX86InstIdVpextrb, X86Mem, X86XmmVar, Imm) + + //! Extract DWORD (AVX). + INST_3i(vpextrd, kX86InstIdVpextrd, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpextrd, kX86InstIdVpextrd, X86Mem, X86XmmVar, Imm) + + //! Extract QWORD (AVX and X64 Only). + INST_3i(vpextrq, kX86InstIdVpextrq, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpextrq, kX86InstIdVpextrq, X86Mem, X86XmmVar, Imm) + + //! Extract WORD (AVX). + INST_3i(vpextrw, kX86InstIdVpextrw, X86GpVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpextrw, kX86InstIdVpextrw, X86Mem, X86XmmVar, Imm) + + //! Packed DWORD horizontal add (AVX). + INST_3x(vphaddd, kX86InstIdVphaddd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vphaddd, kX86InstIdVphaddd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD horizontal add with saturation (AVX). + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD horizontal add (AVX). + INST_3x(vphaddw, kX86InstIdVphaddw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vphaddw, kX86InstIdVphaddw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD horizontal minimum (AVX). + INST_2x(vphminposuw, kX86InstIdVphminposuw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vphminposuw, kX86InstIdVphminposuw, X86XmmVar, X86Mem) + + //! Packed DWORD horizontal subtract (AVX). + INST_3x(vphsubd, kX86InstIdVphsubd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vphsubd, kX86InstIdVphsubd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD horizontal subtract with saturation (AVX). + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD horizontal subtract (AVX). + INST_3x(vphsubw, kX86InstIdVphsubw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vphsubw, kX86InstIdVphsubw, X86XmmVar, X86XmmVar, X86Mem) + + //! Insert BYTE based on selector (AVX). + INST_4i(vpinsrb, kX86InstIdVpinsrb, X86XmmVar, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_4i(vpinsrb, kX86InstIdVpinsrb, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Insert DWORD based on selector (AVX). + INST_4i(vpinsrd, kX86InstIdVpinsrd, X86XmmVar, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_4i(vpinsrd, kX86InstIdVpinsrd, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Insert QWORD based on selector (AVX and X64 Only). + INST_4i(vpinsrq, kX86InstIdVpinsrq, X86XmmVar, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_4i(vpinsrq, kX86InstIdVpinsrq, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Insert WORD based on selector (AVX). + INST_4i(vpinsrw, kX86InstIdVpinsrw, X86XmmVar, X86XmmVar, X86GpVar, Imm) + //! \overload + INST_4i(vpinsrw, kX86InstIdVpinsrw, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Packed multiply and add signed and unsigned bytes (AVX). + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD multiply and add to packed DWORD (AVX). + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE maximum (AVX). + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD maximum (AVX). + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD maximum (AVX). + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE unsigned maximum (AVX). + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD unsigned maximum (AVX). + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD unsigned maximum (AVX). + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE minimum (AVX). + INST_3x(vpminsb, kX86InstIdVpminsb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpminsb, kX86InstIdVpminsb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD minimum (AVX). + INST_3x(vpminsd, kX86InstIdVpminsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpminsd, kX86InstIdVpminsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD minimum (AVX). + INST_3x(vpminsw, kX86InstIdVpminsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpminsw, kX86InstIdVpminsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE unsigned minimum (AVX). + INST_3x(vpminub, kX86InstIdVpminub, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpminub, kX86InstIdVpminub, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD unsigned minimum (AVX). + INST_3x(vpminud, kX86InstIdVpminud, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpminud, kX86InstIdVpminud, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD unsigned minimum (AVX). + INST_3x(vpminuw, kX86InstIdVpminuw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpminuw, kX86InstIdVpminuw, X86XmmVar, X86XmmVar, X86Mem) + + //! Move Byte mask to integer (AVX). + INST_2x(vpmovmskb, kX86InstIdVpmovmskb, X86GpVar, X86XmmVar) + + //! BYTE to DWORD with sign extend (AVX). + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86XmmVar, X86Mem) + + //! Packed BYTE to QWORD with sign extend (AVX). + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86XmmVar, X86Mem) + + //! Packed BYTE to WORD with sign extend (AVX). + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86XmmVar, X86Mem) + + //! Packed DWORD to QWORD with sign extend (AVX). + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86XmmVar, X86Mem) + + //! Packed WORD to DWORD with sign extend (AVX). + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86XmmVar, X86Mem) + + //! Packed WORD to QWORD with sign extend (AVX). + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86XmmVar, X86Mem) + + //! BYTE to DWORD with zero extend (AVX). + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86XmmVar, X86Mem) + + //! Packed BYTE to QWORD with zero extend (AVX). + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86XmmVar, X86Mem) + + //! BYTE to WORD with zero extend (AVX). + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86XmmVar, X86Mem) + + //! Packed DWORD to QWORD with zero extend (AVX). + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86XmmVar, X86Mem) + + //! Packed WORD to DWORD with zero extend (AVX). + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86XmmVar, X86Mem) + + //! Packed WORD to QWORD with zero extend (AVX). + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86XmmVar, X86Mem) + + //! Packed DWORD to QWORD multiply (AVX). + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD multiply high, round and scale (AVX). + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD unsigned multiply high (AVX). + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD multiply high (AVX). + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD multiply low (AVX). + INST_3x(vpmulld, kX86InstIdVpmulld, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmulld, kX86InstIdVpmulld, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORDs multiply low (AVX). + INST_3x(vpmullw, kX86InstIdVpmullw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmullw, kX86InstIdVpmullw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD multiply to QWORD (AVX). + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed bitwise or (AVX). + INST_3x(vpor, kX86InstIdVpor, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpor, kX86InstIdVpor, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD sum of absolute differences (AVX). + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE shuffle (AVX). + INST_3x(vpshufb, kX86InstIdVpshufb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpshufb, kX86InstIdVpshufb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD shuffle (AVX). + INST_3i(vpshufd, kX86InstIdVpshufd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpshufd, kX86InstIdVpshufd, X86XmmVar, X86Mem, Imm) + + //! Packed WORD shuffle high (AVX). + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86XmmVar, X86Mem, Imm) + + //! Packed WORD shuffle low (AVX). + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86XmmVar, X86Mem, Imm) + + //! Packed BYTE sign (AVX). + INST_3x(vpsignb, kX86InstIdVpsignb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsignb, kX86InstIdVpsignb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD sign (AVX). + INST_3x(vpsignd, kX86InstIdVpsignd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsignd, kX86InstIdVpsignd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD sign (AVX). + INST_3x(vpsignw, kX86InstIdVpsignw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsignw, kX86InstIdVpsignw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD shift left logical (AVX). + INST_3x(vpslld, kX86InstIdVpslld, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpslld, kX86InstIdVpslld, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpslld, kX86InstIdVpslld, X86XmmVar, X86XmmVar, Imm) + + //! Packed DQWORD shift left logical (AVX). + INST_3i(vpslldq, kX86InstIdVpslldq, X86XmmVar, X86XmmVar, Imm) + + //! Packed QWORD shift left logical (AVX). + INST_3x(vpsllq, kX86InstIdVpsllq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsllq, kX86InstIdVpsllq, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpsllq, kX86InstIdVpsllq, X86XmmVar, X86XmmVar, Imm) + + //! Packed WORD shift left logical (AVX). + INST_3x(vpsllw, kX86InstIdVpsllw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsllw, kX86InstIdVpsllw, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpsllw, kX86InstIdVpsllw, X86XmmVar, X86XmmVar, Imm) + + //! Packed DWORD shift right arithmetic (AVX). + INST_3x(vpsrad, kX86InstIdVpsrad, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsrad, kX86InstIdVpsrad, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpsrad, kX86InstIdVpsrad, X86XmmVar, X86XmmVar, Imm) + + //! Packed WORD shift right arithmetic (AVX). + INST_3x(vpsraw, kX86InstIdVpsraw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsraw, kX86InstIdVpsraw, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpsraw, kX86InstIdVpsraw, X86XmmVar, X86XmmVar, Imm) + + //! Packed DWORD shift right logical (AVX). + INST_3x(vpsrld, kX86InstIdVpsrld, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsrld, kX86InstIdVpsrld, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpsrld, kX86InstIdVpsrld, X86XmmVar, X86XmmVar, Imm) + + //! Scalar DQWORD shift right logical (AVX). + INST_3i(vpsrldq, kX86InstIdVpsrldq, X86XmmVar, X86XmmVar, Imm) + + //! Packed QWORD shift right logical (AVX). + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpsrlq, kX86InstIdVpsrlq, X86XmmVar, X86XmmVar, Imm) + + //! Packed WORD shift right logical (AVX). + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3i(vpsrlw, kX86InstIdVpsrlw, X86XmmVar, X86XmmVar, Imm) + + //! Packed BYTE subtract (AVX). + INST_3x(vpsubb, kX86InstIdVpsubb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubb, kX86InstIdVpsubb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DWORD subtract (AVX). + INST_3x(vpsubd, kX86InstIdVpsubd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubd, kX86InstIdVpsubd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed QWORD subtract (AVX). + INST_3x(vpsubq, kX86InstIdVpsubq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubq, kX86InstIdVpsubq, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD subtract (AVX). + INST_3x(vpsubw, kX86InstIdVpsubw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubw, kX86InstIdVpsubw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE subtract with saturation (AVX). + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD subtract with saturation (AVX). + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed BYTE subtract with unsigned saturation (AVX). + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed WORD subtract with unsigned saturation (AVX). + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86XmmVar, X86XmmVar, X86Mem) + + //! Logical compare (AVX). + INST_2x(vptest, kX86InstIdVptest, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vptest, kX86InstIdVptest, X86XmmVar, X86Mem) + //! \overload + INST_2x(vptest, kX86InstIdVptest, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vptest, kX86InstIdVptest, X86YmmVar, X86Mem) + + //! Unpack high packed BYTEs to WORDs (AVX). + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86XmmVar, X86XmmVar, X86Mem) + + //! Unpack high packed DWORDs to QWORDs (AVX). + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86XmmVar, X86XmmVar, X86Mem) + + //! Unpack high packed QWORDs to DQWORD (AVX). + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86XmmVar, X86XmmVar, X86Mem) + + //! Unpack high packed WORDs to DWORDs (AVX). + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86XmmVar, X86XmmVar, X86Mem) + + //! Unpack low packed BYTEs to WORDs (AVX). + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86XmmVar, X86XmmVar, X86Mem) + + //! Unpack low packed DWORDs to QWORDs (AVX). + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86XmmVar, X86XmmVar, X86Mem) + + //! Unpack low packed QWORDs to DQWORD (AVX). + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86XmmVar, X86XmmVar, X86Mem) + + //! Unpack low packed WORDs to DWORDs (AVX). + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed bitwise xor (AVX). + INST_3x(vpxor, kX86InstIdVpxor, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vpxor, kX86InstIdVpxor, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed SP-FP reciprocal (AVX). + INST_2x(vrcpps, kX86InstIdVrcpps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vrcpps, kX86InstIdVrcpps, X86XmmVar, X86Mem) + //! \overload + INST_2x(vrcpps, kX86InstIdVrcpps, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vrcpps, kX86InstIdVrcpps, X86YmmVar, X86Mem) + + //! Scalar SP-FP reciprocal (AVX). + INST_3x(vrcpss, kX86InstIdVrcpss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vrcpss, kX86InstIdVrcpss, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed SP-FP square root reciprocal (AVX). + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86XmmVar, X86Mem) + //! \overload + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vrsqrtps, kX86InstIdVrsqrtps, X86YmmVar, X86Mem) + + //! Scalar SP-FP square root reciprocal (AVX). + INST_3x(vrsqrtss, kX86InstIdVrsqrtss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vrsqrtss, kX86InstIdVrsqrtss, X86XmmVar, X86XmmVar, X86Mem) + + //! Packed DP-FP round (AVX). + INST_3i(vroundpd, kX86InstIdVroundpd, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vroundpd, kX86InstIdVroundpd, X86XmmVar, X86Mem, Imm) + //! \overload + INST_3i(vroundpd, kX86InstIdVroundpd, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vroundpd, kX86InstIdVroundpd, X86YmmVar, X86Mem, Imm) + + //! Packed SP-FP round (AVX). + INST_3i(vroundps, kX86InstIdVroundps, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vroundps, kX86InstIdVroundps, X86XmmVar, X86Mem, Imm) + //! \overload + INST_3i(vroundps, kX86InstIdVroundps, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vroundps, kX86InstIdVroundps, X86YmmVar, X86Mem, Imm) + + //! Scalar DP-FP round (AVX). + INST_4i(vroundsd, kX86InstIdVroundsd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vroundsd, kX86InstIdVroundsd, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Scalar SP-FP round (AVX). + INST_4i(vroundss, kX86InstIdVroundss, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vroundss, kX86InstIdVroundss, X86XmmVar, X86XmmVar, X86Mem, Imm) + + //! Shuffle DP-FP (AVX). + INST_4i(vshufpd, kX86InstIdVshufpd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vshufpd, kX86InstIdVshufpd, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vshufpd, kX86InstIdVshufpd, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vshufpd, kX86InstIdVshufpd, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Shuffle SP-FP (AVX). + INST_4i(vshufps, kX86InstIdVshufps, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vshufps, kX86InstIdVshufps, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vshufps, kX86InstIdVshufps, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vshufps, kX86InstIdVshufps, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed DP-FP square root (AVX). + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vsqrtpd, kX86InstIdVsqrtpd, X86YmmVar, X86Mem) + + //! Packed SP-FP square root (AVX). + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86XmmVar, X86Mem) + //! \overload + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vsqrtps, kX86InstIdVsqrtps, X86YmmVar, X86Mem) + + //! Scalar DP-FP square root (AVX). + INST_3x(vsqrtsd, kX86InstIdVsqrtsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vsqrtsd, kX86InstIdVsqrtsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Scalar SP-FP square root (AVX). + INST_3x(vsqrtss, kX86InstIdVsqrtss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vsqrtss, kX86InstIdVsqrtss, X86XmmVar, X86XmmVar, X86Mem) + + //! Store streaming SIMD extension control/status (AVX). + INST_1x(vstmxcsr, kX86InstIdVstmxcsr, X86Mem) + + //! Packed DP-FP subtract (AVX). + INST_3x(vsubpd, kX86InstIdVsubpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vsubpd, kX86InstIdVsubpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vsubpd, kX86InstIdVsubpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vsubpd, kX86InstIdVsubpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP subtract (AVX). + INST_3x(vsubps, kX86InstIdVsubps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vsubps, kX86InstIdVsubps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vsubps, kX86InstIdVsubps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vsubps, kX86InstIdVsubps, X86YmmVar, X86YmmVar, X86Mem) + + //! Scalar DP-FP subtract (AVX). + INST_3x(vsubsd, kX86InstIdVsubsd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vsubsd, kX86InstIdVsubsd, X86XmmVar, X86XmmVar, X86Mem) + + //! Scalar SP-FP subtract (AVX). + INST_3x(vsubss, kX86InstIdVsubss, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vsubss, kX86InstIdVsubss, X86XmmVar, X86XmmVar, X86Mem) + + //! Logical compare DP-FP (AVX). + INST_2x(vtestpd, kX86InstIdVtestpd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vtestpd, kX86InstIdVtestpd, X86XmmVar, X86Mem) + //! \overload + INST_2x(vtestpd, kX86InstIdVtestpd, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vtestpd, kX86InstIdVtestpd, X86YmmVar, X86Mem) + + //! Logical compare SP-FP (AVX). + INST_2x(vtestps, kX86InstIdVtestps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vtestps, kX86InstIdVtestps, X86XmmVar, X86Mem) + //! \overload + INST_2x(vtestps, kX86InstIdVtestps, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vtestps, kX86InstIdVtestps, X86YmmVar, X86Mem) + + //! Scalar DP-FP unordered compare and set EFLAGS (AVX). + INST_2x(vucomisd, kX86InstIdVucomisd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vucomisd, kX86InstIdVucomisd, X86XmmVar, X86Mem) + + //! Unordered scalar SP-FP compare and set EFLAGS (AVX). + INST_2x(vucomiss, kX86InstIdVucomiss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vucomiss, kX86InstIdVucomiss, X86XmmVar, X86Mem) + + //! Unpack and interleave high packed DP-FP (AVX). + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vunpckhpd, kX86InstIdVunpckhpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Unpack high packed SP-FP data (AVX). + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vunpckhps, kX86InstIdVunpckhps, X86YmmVar, X86YmmVar, X86Mem) + + //! Unpack and interleave low packed DP-FP (AVX). + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vunpcklpd, kX86InstIdVunpcklpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Unpack low packed SP-FP data (AVX). + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vunpcklps, kX86InstIdVunpcklps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DP-FP bitwise xor (AVX). + INST_3x(vxorpd, kX86InstIdVxorpd, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vxorpd, kX86InstIdVxorpd, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vxorpd, kX86InstIdVxorpd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vxorpd, kX86InstIdVxorpd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed SP-FP bitwise xor (AVX). + INST_3x(vxorps, kX86InstIdVxorps, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vxorps, kX86InstIdVxorps, X86XmmVar, X86XmmVar, X86Mem) + //! \overload + INST_3x(vxorps, kX86InstIdVxorps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vxorps, kX86InstIdVxorps, X86YmmVar, X86YmmVar, X86Mem) + + //! Zero all YMM registers. + INST_0x(vzeroall, kX86InstIdVzeroall) + //! Zero upper 128-bits of all YMM registers. + INST_0x(vzeroupper, kX86InstIdVzeroupper) + + // -------------------------------------------------------------------------- + // [AVX+AESNI] + // -------------------------------------------------------------------------- + + //! Perform a single round of the AES decryption flow (AVX+AESNI). + INST_3x(vaesdec, kX86InstIdVaesdec, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaesdec, kX86InstIdVaesdec, X86XmmVar, X86XmmVar, X86Mem) + + //! Perform the last round of the AES decryption flow (AVX+AESNI). + INST_3x(vaesdeclast, kX86InstIdVaesdeclast, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaesdeclast, kX86InstIdVaesdeclast, X86XmmVar, X86XmmVar, X86Mem) + + //! Perform a single round of the AES encryption flow (AVX+AESNI). + INST_3x(vaesenc, kX86InstIdVaesenc, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaesenc, kX86InstIdVaesenc, X86XmmVar, X86XmmVar, X86Mem) + + //! Perform the last round of the AES encryption flow (AVX+AESNI). + INST_3x(vaesenclast, kX86InstIdVaesenclast, X86XmmVar, X86XmmVar, X86XmmVar) + //! \overload + INST_3x(vaesenclast, kX86InstIdVaesenclast, X86XmmVar, X86XmmVar, X86Mem) + + //! Perform the InvMixColumns transformation (AVX+AESNI). + INST_2x(vaesimc, kX86InstIdVaesimc, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vaesimc, kX86InstIdVaesimc, X86XmmVar, X86Mem) + + //! Assist in expanding the AES cipher key (AVX+AESNI). + INST_3i(vaeskeygenassist, kX86InstIdVaeskeygenassist, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vaeskeygenassist, kX86InstIdVaeskeygenassist, X86XmmVar, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX+PCLMULQDQ] + // -------------------------------------------------------------------------- + + //! Carry-less multiplication QWORD (AVX+PCLMULQDQ). + INST_4i(vpclmulqdq, kX86InstIdVpclmulqdq, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vpclmulqdq, kX86InstIdVpclmulqdq, X86XmmVar, X86XmmVar, X86Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX2] + // -------------------------------------------------------------------------- + + //! Broadcast low 128-bit element in `o1` to `o0` (AVX2). + INST_2x(vbroadcasti128, kX86InstIdVbroadcasti128, X86YmmVar, X86Mem) + //! Broadcast low DP-FP element in `o1` to `o0` (AVX2). + INST_2x(vbroadcastsd, kX86InstIdVbroadcastsd, X86YmmVar, X86XmmVar) + //! Broadcast low SP-FP element in `o1` to `o0` (AVX2). + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vbroadcastss, kX86InstIdVbroadcastss, X86YmmVar, X86XmmVar) + + //! Extract 128-bit element from `o1` to `o0` based on selector (AVX2). + INST_3i(vextracti128, kX86InstIdVextracti128, X86XmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vextracti128, kX86InstIdVextracti128, X86Mem, X86YmmVar, Imm) + + //! Gather DP-FP from DWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherdpd, kX86InstIdVgatherdpd, X86XmmVar, X86Mem, X86XmmVar) + //! \overload + INST_3x(vgatherdpd, kX86InstIdVgatherdpd, X86YmmVar, X86Mem, X86YmmVar) + + //! Gather SP-FP from DWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherdps, kX86InstIdVgatherdps, X86XmmVar, X86Mem, X86XmmVar) + //! \overload + INST_3x(vgatherdps, kX86InstIdVgatherdps, X86YmmVar, X86Mem, X86YmmVar) + + //! Gather DP-FP from QWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherqpd, kX86InstIdVgatherqpd, X86XmmVar, X86Mem, X86XmmVar) + //! \overload + INST_3x(vgatherqpd, kX86InstIdVgatherqpd, X86YmmVar, X86Mem, X86YmmVar) + + //! Gather SP-FP from QWORD indexes specified in `o1`s VSIB (AVX2). + INST_3x(vgatherqps, kX86InstIdVgatherqps, X86XmmVar, X86Mem, X86XmmVar) + + //! Insert 128-bit of packed data based on selector (AVX2). + INST_4i(vinserti128, kX86InstIdVinserti128, X86YmmVar, X86YmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vinserti128, kX86InstIdVinserti128, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Load 256-bits aligned using NT hint (AVX2). + INST_2x(vmovntdqa, kX86InstIdVmovntdqa, X86YmmVar, X86Mem) + + //! Packed WORD sums of absolute difference (AVX2). + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vmpsadbw, kX86InstIdVmpsadbw, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed BYTE absolute value (AVX2). + INST_2x(vpabsb, kX86InstIdVpabsb, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vpabsb, kX86InstIdVpabsb, X86YmmVar, X86Mem) + + //! Packed DWORD absolute value (AVX2). + INST_2x(vpabsd, kX86InstIdVpabsd, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vpabsd, kX86InstIdVpabsd, X86YmmVar, X86Mem) + + //! Packed WORD absolute value (AVX2). + INST_2x(vpabsw, kX86InstIdVpabsw, X86YmmVar, X86YmmVar) + //! \overload + INST_2x(vpabsw, kX86InstIdVpabsw, X86YmmVar, X86Mem) + + //! Pack DWORDs to WORDs with signed saturation (AVX2). + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpackssdw, kX86InstIdVpackssdw, X86YmmVar, X86YmmVar, X86Mem) + + //! Pack WORDs to BYTEs with signed saturation (AVX2). + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpacksswb, kX86InstIdVpacksswb, X86YmmVar, X86YmmVar, X86Mem) + + //! Pack DWORDs to WORDs with unsigned saturation (AVX2). + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpackusdw, kX86InstIdVpackusdw, X86YmmVar, X86YmmVar, X86Mem) + + //! Pack WORDs to BYTEs with unsigned saturation (AVX2). + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpackuswb, kX86InstIdVpackuswb, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed BYTE add (AVX2). + INST_3x(vpaddb, kX86InstIdVpaddb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddb, kX86InstIdVpaddb, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DWORD add (AVX2). + INST_3x(vpaddd, kX86InstIdVpaddd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddd, kX86InstIdVpaddd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed QDWORD add (AVX2). + INST_3x(vpaddq, kX86InstIdVpaddq, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddq, kX86InstIdVpaddq, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed WORD add (AVX2). + INST_3x(vpaddw, kX86InstIdVpaddw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddw, kX86InstIdVpaddw, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed BYTE add with saturation (AVX2). + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddsb, kX86InstIdVpaddsb, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed WORD add with saturation (AVX2). + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddsw, kX86InstIdVpaddsw, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed BYTE add with unsigned saturation (AVX2). + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddusb, kX86InstIdVpaddusb, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed WORD add with unsigned saturation (AVX2). + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpaddusw, kX86InstIdVpaddusw, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed align right (AVX2). + INST_4i(vpalignr, kX86InstIdVpalignr, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vpalignr, kX86InstIdVpalignr, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed bitwise and (AVX2). + INST_3x(vpand, kX86InstIdVpand, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpand, kX86InstIdVpand, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed bitwise and-not (AVX2). + INST_3x(vpandn, kX86InstIdVpandn, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpandn, kX86InstIdVpandn, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed BYTE average (AVX2). + INST_3x(vpavgb, kX86InstIdVpavgb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpavgb, kX86InstIdVpavgb, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed WORD average (AVX2). + INST_3x(vpavgw, kX86InstIdVpavgw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpavgw, kX86InstIdVpavgw, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DWORD blend (AVX2). + INST_4i(vpblendd, kX86InstIdVpblendd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_4i(vpblendd, kX86InstIdVpblendd, X86XmmVar, X86XmmVar, X86Mem, Imm) + //! \overload + INST_4i(vpblendd, kX86InstIdVpblendd, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vpblendd, kX86InstIdVpblendd, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed DWORD variable blend (AVX2). + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_4x(vpblendvb, kX86InstIdVpblendvb, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + + //! Packed WORD blend (AVX2). + INST_4i(vpblendw, kX86InstIdVpblendw, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vpblendw, kX86InstIdVpblendw, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Broadcast BYTE from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86XmmVar, X86Mem) + //! Broadcast BYTE from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86YmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastb, kX86InstIdVpbroadcastb, X86YmmVar, X86Mem) + + //! Broadcast DWORD from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86XmmVar, X86Mem) + //! Broadcast DWORD from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86YmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastd, kX86InstIdVpbroadcastd, X86YmmVar, X86Mem) + + //! Broadcast QWORD from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86XmmVar, X86Mem) + //! Broadcast QWORD from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86YmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastq, kX86InstIdVpbroadcastq, X86YmmVar, X86Mem) + + //! Broadcast WORD from `o1` to 128-bits in `o0` (AVX2). + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86XmmVar, X86Mem) + //! Broadcast WORD from `o1` to 256-bits in `o0` (AVX2). + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86YmmVar, X86XmmVar) + //! \overload + INST_2x(vpbroadcastw, kX86InstIdVpbroadcastw, X86YmmVar, X86Mem) + + //! Packed BYTEs compare for equality (AVX2). + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpeqb, kX86InstIdVpcmpeqb, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DWORDs compare for equality (AVX2). + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpeqd, kX86InstIdVpcmpeqd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed QWORDs compare for equality (AVX2). + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpeqq, kX86InstIdVpcmpeqq, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed WORDs compare for equality (AVX2). + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpeqw, kX86InstIdVpcmpeqw, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed BYTEs compare if greater than (AVX2). + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpgtb, kX86InstIdVpcmpgtb, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DWORDs compare if greater than (AVX2). + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpgtd, kX86InstIdVpcmpgtd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed QWORDs compare if greater than (AVX2). + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpgtq, kX86InstIdVpcmpgtq, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed WORDs compare if greater than (AVX2). + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpcmpgtw, kX86InstIdVpcmpgtw, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DQWORD permute (AVX2). + INST_4i(vperm2i128, kX86InstIdVperm2i128, X86YmmVar, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_4i(vperm2i128, kX86InstIdVperm2i128, X86YmmVar, X86YmmVar, X86Mem, Imm) + + //! Packed DWORD permute (AVX2). + INST_3x(vpermd, kX86InstIdVpermd, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpermd, kX86InstIdVpermd, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed DP-FP permute (AVX2). + INST_3i(vpermpd, kX86InstIdVpermpd, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vpermpd, kX86InstIdVpermpd, X86YmmVar, X86Mem, Imm) + + //! Packed SP-FP permute (AVX2). + INST_3x(vpermps, kX86InstIdVpermps, X86YmmVar, X86YmmVar, X86YmmVar) + //! \overload + INST_3x(vpermps, kX86InstIdVpermps, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed QWORD permute (AVX2). + INST_3i(vpermq, kX86InstIdVpermq, X86YmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vpermq, kX86InstIdVpermq, X86YmmVar, X86Mem, Imm) + + INST_3x(vpgatherdd, kX86InstIdVpgatherdd, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpgatherdd, kX86InstIdVpgatherdd, X86YmmVar, X86Mem, X86YmmVar) + + INST_3x(vpgatherdq, kX86InstIdVpgatherdq, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpgatherdq, kX86InstIdVpgatherdq, X86YmmVar, X86Mem, X86YmmVar) + + INST_3x(vpgatherqd, kX86InstIdVpgatherqd, X86XmmVar, X86Mem, X86XmmVar) + + INST_3x(vpgatherqq, kX86InstIdVpgatherqq, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpgatherqq, kX86InstIdVpgatherqq, X86YmmVar, X86Mem, X86YmmVar) + + //! Packed DWORD horizontal add (AVX2). + INST_3x(vphaddd, kX86InstIdVphaddd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vphaddd, kX86InstIdVphaddd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD horizontal add with saturation (AVX2). + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vphaddsw, kX86InstIdVphaddsw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD horizontal add (AVX2). + INST_3x(vphaddw, kX86InstIdVphaddw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vphaddw, kX86InstIdVphaddw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD horizontal subtract (AVX2). + INST_3x(vphsubd, kX86InstIdVphsubd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vphsubd, kX86InstIdVphsubd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD horizontal subtract with saturation (AVX2). + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vphsubsw, kX86InstIdVphsubsw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD horizontal subtract (AVX2). + INST_3x(vphsubw, kX86InstIdVphsubw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vphsubw, kX86InstIdVphsubw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Move Byte mask to integer (AVX2). + INST_2x(vpmovmskb, kX86InstIdVpmovmskb, X86GpVar, X86YmmVar) + + //! BYTE to DWORD with sign extend (AVX). + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovsxbd, kX86InstIdVpmovsxbd, X86YmmVar, X86XmmVar) + + //! Packed BYTE to QWORD with sign extend (AVX2). + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovsxbq, kX86InstIdVpmovsxbq, X86YmmVar, X86XmmVar) + + //! Packed BYTE to WORD with sign extend (AVX2). + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovsxbw, kX86InstIdVpmovsxbw, X86YmmVar, X86XmmVar) + + //! Packed DWORD to QWORD with sign extend (AVX2). + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovsxdq, kX86InstIdVpmovsxdq, X86YmmVar, X86XmmVar) + + //! Packed WORD to DWORD with sign extend (AVX2). + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovsxwd, kX86InstIdVpmovsxwd, X86YmmVar, X86XmmVar) + + //! Packed WORD to QWORD with sign extend (AVX2). + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovsxwq, kX86InstIdVpmovsxwq, X86YmmVar, X86XmmVar) + + //! BYTE to DWORD with zero extend (AVX2). + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovzxbd, kX86InstIdVpmovzxbd, X86YmmVar, X86XmmVar) + + //! Packed BYTE to QWORD with zero extend (AVX2). + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovzxbq, kX86InstIdVpmovzxbq, X86YmmVar, X86XmmVar) + + //! BYTE to WORD with zero extend (AVX2). + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovzxbw, kX86InstIdVpmovzxbw, X86YmmVar, X86XmmVar) + + //! Packed DWORD to QWORD with zero extend (AVX2). + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovzxdq, kX86InstIdVpmovzxdq, X86YmmVar, X86XmmVar) + + //! Packed WORD to DWORD with zero extend (AVX2). + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovzxwd, kX86InstIdVpmovzxwd, X86YmmVar, X86XmmVar) + + //! Packed WORD to QWORD with zero extend (AVX2). + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86YmmVar, X86Mem) + //! \overload + INST_2x(vpmovzxwq, kX86InstIdVpmovzxwq, X86YmmVar, X86XmmVar) + + //! Packed multiply and add signed and unsigned bytes (AVX2). + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaddubsw, kX86InstIdVpmaddubsw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD multiply and add to packed DWORD (AVX2). + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaddwd, kX86InstIdVpmaddwd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86Mem, X86XmmVar, X86XmmVar) + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86Mem, X86YmmVar, X86YmmVar) + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vpmaskmovd, kX86InstIdVpmaskmovd, X86YmmVar, X86YmmVar, X86Mem) + + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86Mem, X86XmmVar, X86XmmVar) + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86Mem, X86YmmVar, X86YmmVar) + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vpmaskmovq, kX86InstIdVpmaskmovq, X86YmmVar, X86YmmVar, X86Mem) + + //! Packed BYTE maximum (AVX2). + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaxsb, kX86InstIdVpmaxsb, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD maximum (AVX2). + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaxsd, kX86InstIdVpmaxsd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD maximum (AVX2). + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaxsw, kX86InstIdVpmaxsw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed BYTE unsigned maximum (AVX2). + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaxub, kX86InstIdVpmaxub, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD unsigned maximum (AVX2). + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaxud, kX86InstIdVpmaxud, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD unsigned maximum (AVX2). + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmaxuw, kX86InstIdVpmaxuw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed BYTE minimum (AVX2). + INST_3x(vpminsb, kX86InstIdVpminsb, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpminsb, kX86InstIdVpminsb, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD minimum (AVX2). + INST_3x(vpminsd, kX86InstIdVpminsd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpminsd, kX86InstIdVpminsd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD minimum (AVX2). + INST_3x(vpminsw, kX86InstIdVpminsw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpminsw, kX86InstIdVpminsw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed BYTE unsigned minimum (AVX2). + INST_3x(vpminub, kX86InstIdVpminub, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpminub, kX86InstIdVpminub, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD unsigned minimum (AVX2). + INST_3x(vpminud, kX86InstIdVpminud, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpminud, kX86InstIdVpminud, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD unsigned minimum (AVX2). + INST_3x(vpminuw, kX86InstIdVpminuw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpminuw, kX86InstIdVpminuw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD to QWORD multiply (AVX2). + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmuldq, kX86InstIdVpmuldq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD multiply high, round and scale (AVX2). + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmulhrsw, kX86InstIdVpmulhrsw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD unsigned multiply high (AVX2). + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmulhuw, kX86InstIdVpmulhuw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD multiply high (AVX2). + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmulhw, kX86InstIdVpmulhw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD multiply low (AVX2). + INST_3x(vpmulld, kX86InstIdVpmulld, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmulld, kX86InstIdVpmulld, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORDs multiply low (AVX2). + INST_3x(vpmullw, kX86InstIdVpmullw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmullw, kX86InstIdVpmullw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD multiply to QWORD (AVX2). + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpmuludq, kX86InstIdVpmuludq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed bitwise or (AVX2). + INST_3x(vpor, kX86InstIdVpor, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpor, kX86InstIdVpor, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD sum of absolute differences (AVX2). + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsadbw, kX86InstIdVpsadbw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed BYTE shuffle (AVX2). + INST_3x(vpshufb, kX86InstIdVpshufb, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpshufb, kX86InstIdVpshufb, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD shuffle (AVX2). + INST_3i(vpshufd, kX86InstIdVpshufd, X86YmmVar, X86Mem, Imm) + //! \overload + INST_3i(vpshufd, kX86InstIdVpshufd, X86YmmVar, X86YmmVar, Imm) + + //! Packed WORD shuffle high (AVX2). + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86YmmVar, X86Mem, Imm) + //! \overload + INST_3i(vpshufhw, kX86InstIdVpshufhw, X86YmmVar, X86YmmVar, Imm) + + //! Packed WORD shuffle low (AVX2). + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86YmmVar, X86Mem, Imm) + //! \overload + INST_3i(vpshuflw, kX86InstIdVpshuflw, X86YmmVar, X86YmmVar, Imm) + + //! Packed BYTE sign (AVX2). + INST_3x(vpsignb, kX86InstIdVpsignb, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsignb, kX86InstIdVpsignb, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD sign (AVX2). + INST_3x(vpsignd, kX86InstIdVpsignd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsignd, kX86InstIdVpsignd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD sign (AVX2). + INST_3x(vpsignw, kX86InstIdVpsignw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsignw, kX86InstIdVpsignw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD shift left logical (AVX2). + INST_3x(vpslld, kX86InstIdVpslld, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpslld, kX86InstIdVpslld, X86YmmVar, X86YmmVar, X86XmmVar) + //! \overload + INST_3i(vpslld, kX86InstIdVpslld, X86YmmVar, X86YmmVar, Imm) + + //! Packed DQWORD shift left logical (AVX2). + INST_3i(vpslldq, kX86InstIdVpslldq, X86YmmVar, X86YmmVar, Imm) + + //! Packed QWORD shift left logical (AVX2). + INST_3x(vpsllq, kX86InstIdVpsllq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsllq, kX86InstIdVpsllq, X86YmmVar, X86YmmVar, X86XmmVar) + //! \overload + INST_3i(vpsllq, kX86InstIdVpsllq, X86YmmVar, X86YmmVar, Imm) + + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vpsllvd, kX86InstIdVpsllvd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vpsllvq, kX86InstIdVpsllvq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD shift left logical (AVX2). + INST_3x(vpsllw, kX86InstIdVpsllw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsllw, kX86InstIdVpsllw, X86YmmVar, X86YmmVar, X86XmmVar) + //! Packed WORD shift left logical (AVX2). + INST_3i(vpsllw, kX86InstIdVpsllw, X86YmmVar, X86YmmVar, Imm) + + //! Packed DWORD shift right arithmetic (AVX2). + INST_3x(vpsrad, kX86InstIdVpsrad, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsrad, kX86InstIdVpsrad, X86YmmVar, X86YmmVar, X86XmmVar) + //! \overload + INST_3i(vpsrad, kX86InstIdVpsrad, X86YmmVar, X86YmmVar, Imm) + + INST_3x(vpsravd, kX86InstIdVpsravd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vpsravd, kX86InstIdVpsravd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpsravd, kX86InstIdVpsravd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vpsravd, kX86InstIdVpsravd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD shift right arithmetic (AVX2). + INST_3x(vpsraw, kX86InstIdVpsraw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsraw, kX86InstIdVpsraw, X86YmmVar, X86YmmVar, X86XmmVar) + //! \overload + INST_3i(vpsraw, kX86InstIdVpsraw, X86YmmVar, X86YmmVar, Imm) + + //! Packed DWORD shift right logical (AVX2). + INST_3x(vpsrld, kX86InstIdVpsrld, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsrld, kX86InstIdVpsrld, X86YmmVar, X86YmmVar, X86XmmVar) + //! \overload + INST_3i(vpsrld, kX86InstIdVpsrld, X86YmmVar, X86YmmVar, Imm) + + //! Scalar DQWORD shift right logical (AVX2). + INST_3i(vpsrldq, kX86InstIdVpsrldq, X86YmmVar, X86YmmVar, Imm) + + //! Packed QWORD shift right logical (AVX2). + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsrlq, kX86InstIdVpsrlq, X86YmmVar, X86YmmVar, X86XmmVar) + //! \overload + INST_3i(vpsrlq, kX86InstIdVpsrlq, X86YmmVar, X86YmmVar, Imm) + + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vpsrlvd, kX86InstIdVpsrlvd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vpsrlvq, kX86InstIdVpsrlvq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD shift right logical (AVX2). + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsrlw, kX86InstIdVpsrlw, X86YmmVar, X86YmmVar, X86XmmVar) + //! \overload + INST_3i(vpsrlw, kX86InstIdVpsrlw, X86YmmVar, X86YmmVar, Imm) + + //! Packed BYTE subtract (AVX2). + INST_3x(vpsubb, kX86InstIdVpsubb, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vpsubb, kX86InstIdVpsubb, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed DWORD subtract (AVX2). + INST_3x(vpsubd, kX86InstIdVpsubd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsubd, kX86InstIdVpsubd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed QWORD subtract (AVX2). + INST_3x(vpsubq, kX86InstIdVpsubq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsubq, kX86InstIdVpsubq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed BYTE subtract with saturation (AVX2). + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsubsb, kX86InstIdVpsubsb, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD subtract with saturation (AVX2). + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsubsw, kX86InstIdVpsubsw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed BYTE subtract with unsigned saturation (AVX2). + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsubusb, kX86InstIdVpsubusb, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD subtract with unsigned saturation (AVX2). + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsubusw, kX86InstIdVpsubusw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed WORD subtract (AVX2). + INST_3x(vpsubw, kX86InstIdVpsubw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpsubw, kX86InstIdVpsubw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack high packed BYTEs to WORDs (AVX2). + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpckhbw, kX86InstIdVpunpckhbw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack high packed DWORDs to QWORDs (AVX2). + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpckhdq, kX86InstIdVpunpckhdq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack high packed QWORDs to DQWORD (AVX2). + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpckhqdq, kX86InstIdVpunpckhqdq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack high packed WORDs to DWORDs (AVX2). + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpckhwd, kX86InstIdVpunpckhwd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack low packed BYTEs to WORDs (AVX2). + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpcklbw, kX86InstIdVpunpcklbw, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack low packed DWORDs to QWORDs (AVX2). + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpckldq, kX86InstIdVpunpckldq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack low packed QWORDs to DQWORD (AVX2). + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpcklqdq, kX86InstIdVpunpcklqdq, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Unpack low packed WORDs to DWORDs (AVX2). + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpunpcklwd, kX86InstIdVpunpcklwd, X86YmmVar, X86YmmVar, X86YmmVar) + + //! Packed bitwise xor (AVX2). + INST_3x(vpxor, kX86InstIdVpxor, X86YmmVar, X86YmmVar, X86Mem) + //! \overload + INST_3x(vpxor, kX86InstIdVpxor, X86YmmVar, X86YmmVar, X86YmmVar) + + // -------------------------------------------------------------------------- + // [FMA3] + // -------------------------------------------------------------------------- + + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmadd132pd, kX86InstIdVfmadd132pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmadd132ps, kX86InstIdVfmadd132ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmadd132sd, kX86InstIdVfmadd132sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd132sd, kX86InstIdVfmadd132sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmadd132ss, kX86InstIdVfmadd132ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd132ss, kX86InstIdVfmadd132ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmadd213pd, kX86InstIdVfmadd213pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmadd213ps, kX86InstIdVfmadd213ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmadd213sd, kX86InstIdVfmadd213sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd213sd, kX86InstIdVfmadd213sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmadd213ss, kX86InstIdVfmadd213ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd213ss, kX86InstIdVfmadd213ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmadd231pd, kX86InstIdVfmadd231pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmadd231ps, kX86InstIdVfmadd231ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmadd231sd, kX86InstIdVfmadd231sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd231sd, kX86InstIdVfmadd231sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmadd231ss, kX86InstIdVfmadd231ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmadd231ss, kX86InstIdVfmadd231ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmaddsub132pd, kX86InstIdVfmaddsub132pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmaddsub132ps, kX86InstIdVfmaddsub132ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmaddsub213pd, kX86InstIdVfmaddsub213pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmaddsub213ps, kX86InstIdVfmaddsub213ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmaddsub231pd, kX86InstIdVfmaddsub231pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmaddsub231ps, kX86InstIdVfmaddsub231ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsub132pd, kX86InstIdVfmsub132pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsub132ps, kX86InstIdVfmsub132ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsub132sd, kX86InstIdVfmsub132sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub132sd, kX86InstIdVfmsub132sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmsub132ss, kX86InstIdVfmsub132ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub132ss, kX86InstIdVfmsub132ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsub213pd, kX86InstIdVfmsub213pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsub213ps, kX86InstIdVfmsub213ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsub213sd, kX86InstIdVfmsub213sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub213sd, kX86InstIdVfmsub213sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmsub213ss, kX86InstIdVfmsub213ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub213ss, kX86InstIdVfmsub213ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsub231pd, kX86InstIdVfmsub231pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsub231ps, kX86InstIdVfmsub231ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsub231sd, kX86InstIdVfmsub231sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub231sd, kX86InstIdVfmsub231sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmsub231ss, kX86InstIdVfmsub231ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsub231ss, kX86InstIdVfmsub231ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsubadd132pd, kX86InstIdVfmsubadd132pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsubadd132ps, kX86InstIdVfmsubadd132ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsubadd213pd, kX86InstIdVfmsubadd213pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsubadd213ps, kX86InstIdVfmsubadd213ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsubadd231pd, kX86InstIdVfmsubadd231pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfmsubadd231ps, kX86InstIdVfmsubadd231ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmadd132pd, kX86InstIdVfnmadd132pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmadd132ps, kX86InstIdVfnmadd132ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmadd132sd, kX86InstIdVfnmadd132sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd132sd, kX86InstIdVfnmadd132sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmadd132ss, kX86InstIdVfnmadd132ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd132ss, kX86InstIdVfnmadd132ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmadd213pd, kX86InstIdVfnmadd213pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmadd213ps, kX86InstIdVfnmadd213ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmadd213sd, kX86InstIdVfnmadd213sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd213sd, kX86InstIdVfnmadd213sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmadd213ss, kX86InstIdVfnmadd213ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd213ss, kX86InstIdVfnmadd213ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmadd231pd, kX86InstIdVfnmadd231pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmadd231ps, kX86InstIdVfnmadd231ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmadd231sd, kX86InstIdVfnmadd231sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd231sd, kX86InstIdVfnmadd231sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmadd231ss, kX86InstIdVfnmadd231ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmadd231ss, kX86InstIdVfnmadd231ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmsub132pd, kX86InstIdVfnmsub132pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmsub132ps, kX86InstIdVfnmsub132ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmsub132sd, kX86InstIdVfnmsub132sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub132sd, kX86InstIdVfnmsub132sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmsub132ss, kX86InstIdVfnmsub132ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub132ss, kX86InstIdVfnmsub132ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmsub213pd, kX86InstIdVfnmsub213pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmsub213ps, kX86InstIdVfnmsub213ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmsub213sd, kX86InstIdVfnmsub213sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub213sd, kX86InstIdVfnmsub213sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmsub213ss, kX86InstIdVfnmsub213ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub213ss, kX86InstIdVfnmsub213ss, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmsub231pd, kX86InstIdVfnmsub231pd, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86YmmVar, X86YmmVar, X86Mem) + INST_3x(vfnmsub231ps, kX86InstIdVfnmsub231ps, X86YmmVar, X86YmmVar, X86YmmVar) + + INST_3x(vfnmsub231sd, kX86InstIdVfnmsub231sd, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub231sd, kX86InstIdVfnmsub231sd, X86XmmVar, X86XmmVar, X86XmmVar) + + INST_3x(vfnmsub231ss, kX86InstIdVfnmsub231ss, X86XmmVar, X86XmmVar, X86Mem) + INST_3x(vfnmsub231ss, kX86InstIdVfnmsub231ss, X86XmmVar, X86XmmVar, X86XmmVar) + + // -------------------------------------------------------------------------- + // [FMA4] + // -------------------------------------------------------------------------- + + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmaddpd, kX86InstIdVfmaddpd, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmaddps, kX86InstIdVfmaddps, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmaddsd, kX86InstIdVfmaddsd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmaddsd, kX86InstIdVfmaddsd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmaddsd, kX86InstIdVfmaddsd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_4x(vfmaddss, kX86InstIdVfmaddss, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmaddss, kX86InstIdVfmaddss, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmaddss, kX86InstIdVfmaddss, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmaddsubpd, kX86InstIdVfmaddsubpd, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmaddsubps, kX86InstIdVfmaddsubps, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmsubaddpd, kX86InstIdVfmsubaddpd, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmsubaddps, kX86InstIdVfmsubaddps, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmsubpd, kX86InstIdVfmsubpd, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfmsubps, kX86InstIdVfmsubps, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfmsubsd, kX86InstIdVfmsubsd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmsubsd, kX86InstIdVfmsubsd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmsubsd, kX86InstIdVfmsubsd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_4x(vfmsubss, kX86InstIdVfmsubss, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfmsubss, kX86InstIdVfmsubss, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfmsubss, kX86InstIdVfmsubss, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfnmaddpd, kX86InstIdVfnmaddpd, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfnmaddps, kX86InstIdVfnmaddps, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfnmaddsd, kX86InstIdVfnmaddsd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmaddsd, kX86InstIdVfnmaddsd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmaddsd, kX86InstIdVfnmaddsd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_4x(vfnmaddss, kX86InstIdVfnmaddss, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmaddss, kX86InstIdVfnmaddss, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmaddss, kX86InstIdVfnmaddss, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfnmsubpd, kX86InstIdVfnmsubpd, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vfnmsubps, kX86InstIdVfnmsubps, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vfnmsubsd, kX86InstIdVfnmsubsd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmsubsd, kX86InstIdVfnmsubsd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmsubsd, kX86InstIdVfnmsubsd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_4x(vfnmsubss, kX86InstIdVfnmsubss, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vfnmsubss, kX86InstIdVfnmsubss, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vfnmsubss, kX86InstIdVfnmsubss, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + // -------------------------------------------------------------------------- + // [XOP] + // -------------------------------------------------------------------------- + + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86XmmVar, X86XmmVar) + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86XmmVar, X86Mem) + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86YmmVar, X86YmmVar) + INST_2x(vfrczpd, kX86InstIdVfrczpd, X86YmmVar, X86Mem) + + INST_2x(vfrczps, kX86InstIdVfrczps, X86XmmVar, X86XmmVar) + INST_2x(vfrczps, kX86InstIdVfrczps, X86XmmVar, X86Mem) + INST_2x(vfrczps, kX86InstIdVfrczps, X86YmmVar, X86YmmVar) + INST_2x(vfrczps, kX86InstIdVfrczps, X86YmmVar, X86Mem) + + INST_2x(vfrczsd, kX86InstIdVfrczsd, X86XmmVar, X86XmmVar) + INST_2x(vfrczsd, kX86InstIdVfrczsd, X86XmmVar, X86Mem) + + INST_2x(vfrczss, kX86InstIdVfrczss, X86XmmVar, X86XmmVar) + INST_2x(vfrczss, kX86InstIdVfrczss, X86XmmVar, X86Mem) + + INST_4x(vpcmov, kX86InstIdVpcmov, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpcmov, kX86InstIdVpcmov, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpcmov, kX86InstIdVpcmov, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vpcmov, kX86InstIdVpcmov, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vpcmov, kX86InstIdVpcmov, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vpcmov, kX86InstIdVpcmov, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4i(vpcomb, kX86InstIdVpcomb, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomb, kX86InstIdVpcomb, X86XmmVar, X86XmmVar, X86Mem, Imm) + INST_4i(vpcomd, kX86InstIdVpcomd, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomd, kX86InstIdVpcomd, X86XmmVar, X86XmmVar, X86Mem, Imm) + INST_4i(vpcomq, kX86InstIdVpcomq, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomq, kX86InstIdVpcomq, X86XmmVar, X86XmmVar, X86Mem, Imm) + INST_4i(vpcomw, kX86InstIdVpcomw, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomw, kX86InstIdVpcomw, X86XmmVar, X86XmmVar, X86Mem, Imm) + + INST_4i(vpcomub, kX86InstIdVpcomub, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomub, kX86InstIdVpcomub, X86XmmVar, X86XmmVar, X86Mem, Imm) + INST_4i(vpcomud, kX86InstIdVpcomud, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomud, kX86InstIdVpcomud, X86XmmVar, X86XmmVar, X86Mem, Imm) + INST_4i(vpcomuq, kX86InstIdVpcomuq, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomuq, kX86InstIdVpcomuq, X86XmmVar, X86XmmVar, X86Mem, Imm) + INST_4i(vpcomuw, kX86InstIdVpcomuw, X86XmmVar, X86XmmVar, X86XmmVar, Imm) + INST_4i(vpcomuw, kX86InstIdVpcomuw, X86XmmVar, X86XmmVar, X86Mem, Imm) + + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vpermil2pd, kX86InstIdVpermil2pd, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86YmmVar, X86YmmVar, X86YmmVar, X86YmmVar) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86YmmVar, X86YmmVar, X86Mem, X86YmmVar) + INST_4x(vpermil2ps, kX86InstIdVpermil2ps, X86YmmVar, X86YmmVar, X86YmmVar, X86Mem) + + INST_2x(vphaddbd, kX86InstIdVphaddbd, X86XmmVar, X86XmmVar) + INST_2x(vphaddbd, kX86InstIdVphaddbd, X86XmmVar, X86Mem) + INST_2x(vphaddbq, kX86InstIdVphaddbq, X86XmmVar, X86XmmVar) + INST_2x(vphaddbq, kX86InstIdVphaddbq, X86XmmVar, X86Mem) + INST_2x(vphaddbw, kX86InstIdVphaddbw, X86XmmVar, X86XmmVar) + INST_2x(vphaddbw, kX86InstIdVphaddbw, X86XmmVar, X86Mem) + INST_2x(vphadddq, kX86InstIdVphadddq, X86XmmVar, X86XmmVar) + INST_2x(vphadddq, kX86InstIdVphadddq, X86XmmVar, X86Mem) + INST_2x(vphaddwd, kX86InstIdVphaddwd, X86XmmVar, X86XmmVar) + INST_2x(vphaddwd, kX86InstIdVphaddwd, X86XmmVar, X86Mem) + INST_2x(vphaddwq, kX86InstIdVphaddwq, X86XmmVar, X86XmmVar) + INST_2x(vphaddwq, kX86InstIdVphaddwq, X86XmmVar, X86Mem) + + INST_2x(vphaddubd, kX86InstIdVphaddubd, X86XmmVar, X86XmmVar) + INST_2x(vphaddubd, kX86InstIdVphaddubd, X86XmmVar, X86Mem) + INST_2x(vphaddubq, kX86InstIdVphaddubq, X86XmmVar, X86XmmVar) + INST_2x(vphaddubq, kX86InstIdVphaddubq, X86XmmVar, X86Mem) + INST_2x(vphaddubw, kX86InstIdVphaddubw, X86XmmVar, X86XmmVar) + INST_2x(vphaddubw, kX86InstIdVphaddubw, X86XmmVar, X86Mem) + INST_2x(vphaddudq, kX86InstIdVphaddudq, X86XmmVar, X86XmmVar) + INST_2x(vphaddudq, kX86InstIdVphaddudq, X86XmmVar, X86Mem) + INST_2x(vphadduwd, kX86InstIdVphadduwd, X86XmmVar, X86XmmVar) + INST_2x(vphadduwd, kX86InstIdVphadduwd, X86XmmVar, X86Mem) + INST_2x(vphadduwq, kX86InstIdVphadduwq, X86XmmVar, X86XmmVar) + INST_2x(vphadduwq, kX86InstIdVphadduwq, X86XmmVar, X86Mem) + + INST_2x(vphsubbw, kX86InstIdVphsubbw, X86XmmVar, X86XmmVar) + INST_2x(vphsubbw, kX86InstIdVphsubbw, X86XmmVar, X86Mem) + INST_2x(vphsubdq, kX86InstIdVphsubdq, X86XmmVar, X86XmmVar) + INST_2x(vphsubdq, kX86InstIdVphsubdq, X86XmmVar, X86Mem) + INST_2x(vphsubwd, kX86InstIdVphsubwd, X86XmmVar, X86XmmVar) + INST_2x(vphsubwd, kX86InstIdVphsubwd, X86XmmVar, X86Mem) + + INST_4x(vpmacsdd, kX86InstIdVpmacsdd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacsdd, kX86InstIdVpmacsdd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacsdqh, kX86InstIdVpmacsdqh, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacsdqh, kX86InstIdVpmacsdqh, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacsdql, kX86InstIdVpmacsdql, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacsdql, kX86InstIdVpmacsdql, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacswd, kX86InstIdVpmacswd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacswd, kX86InstIdVpmacswd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacsww, kX86InstIdVpmacsww, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacsww, kX86InstIdVpmacsww, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + + INST_4x(vpmacssdd, kX86InstIdVpmacssdd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacssdd, kX86InstIdVpmacssdd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacssdqh, kX86InstIdVpmacssdqh, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacssdqh, kX86InstIdVpmacssdqh, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacssdql, kX86InstIdVpmacssdql, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacssdql, kX86InstIdVpmacssdql, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacsswd, kX86InstIdVpmacsswd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacsswd, kX86InstIdVpmacsswd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpmacssww, kX86InstIdVpmacssww, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmacssww, kX86InstIdVpmacssww, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + + INST_4x(vpmadcsswd, kX86InstIdVpmadcsswd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmadcsswd, kX86InstIdVpmadcsswd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + + INST_4x(vpmadcswd, kX86InstIdVpmadcswd, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpmadcswd, kX86InstIdVpmadcswd, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + + INST_4x(vpperm, kX86InstIdVpperm, X86XmmVar, X86XmmVar, X86XmmVar, X86XmmVar) + INST_4x(vpperm, kX86InstIdVpperm, X86XmmVar, X86XmmVar, X86Mem, X86XmmVar) + INST_4x(vpperm, kX86InstIdVpperm, X86XmmVar, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vprotb, kX86InstIdVprotb, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vprotb, kX86InstIdVprotb, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vprotb, kX86InstIdVprotb, X86XmmVar, X86XmmVar, X86Mem) + INST_3i(vprotb, kX86InstIdVprotb, X86XmmVar, X86XmmVar, Imm) + INST_3i(vprotb, kX86InstIdVprotb, X86XmmVar, X86Mem, Imm) + + INST_3x(vprotd, kX86InstIdVprotd, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vprotd, kX86InstIdVprotd, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vprotd, kX86InstIdVprotd, X86XmmVar, X86XmmVar, X86Mem) + INST_3i(vprotd, kX86InstIdVprotd, X86XmmVar, X86XmmVar, Imm) + INST_3i(vprotd, kX86InstIdVprotd, X86XmmVar, X86Mem, Imm) + + INST_3x(vprotq, kX86InstIdVprotq, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vprotq, kX86InstIdVprotq, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vprotq, kX86InstIdVprotq, X86XmmVar, X86XmmVar, X86Mem) + INST_3i(vprotq, kX86InstIdVprotq, X86XmmVar, X86XmmVar, Imm) + INST_3i(vprotq, kX86InstIdVprotq, X86XmmVar, X86Mem, Imm) + + INST_3x(vprotw, kX86InstIdVprotw, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vprotw, kX86InstIdVprotw, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vprotw, kX86InstIdVprotw, X86XmmVar, X86XmmVar, X86Mem) + INST_3i(vprotw, kX86InstIdVprotw, X86XmmVar, X86XmmVar, Imm) + INST_3i(vprotw, kX86InstIdVprotw, X86XmmVar, X86Mem, Imm) + + INST_3x(vpshab, kX86InstIdVpshab, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshab, kX86InstIdVpshab, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshab, kX86InstIdVpshab, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vpshad, kX86InstIdVpshad, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshad, kX86InstIdVpshad, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshad, kX86InstIdVpshad, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vpshaq, kX86InstIdVpshaq, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshaq, kX86InstIdVpshaq, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshaq, kX86InstIdVpshaq, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vpshaw, kX86InstIdVpshaw, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshaw, kX86InstIdVpshaw, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshaw, kX86InstIdVpshaw, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vpshlb, kX86InstIdVpshlb, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshlb, kX86InstIdVpshlb, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshlb, kX86InstIdVpshlb, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vpshld, kX86InstIdVpshld, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshld, kX86InstIdVpshld, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshld, kX86InstIdVpshld, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vpshlq, kX86InstIdVpshlq, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshlq, kX86InstIdVpshlq, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshlq, kX86InstIdVpshlq, X86XmmVar, X86XmmVar, X86Mem) + + INST_3x(vpshlw, kX86InstIdVpshlw, X86XmmVar, X86XmmVar, X86XmmVar) + INST_3x(vpshlw, kX86InstIdVpshlw, X86XmmVar, X86Mem, X86XmmVar) + INST_3x(vpshlw, kX86InstIdVpshlw, X86XmmVar, X86XmmVar, X86Mem) + + // -------------------------------------------------------------------------- + // [F16C] + // -------------------------------------------------------------------------- + + //! Convert packed HP-FP to SP-FP. + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86XmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86XmmVar, X86Mem) + //! \overload + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86YmmVar, X86XmmVar) + //! \overload + INST_2x(vcvtph2ps, kX86InstIdVcvtph2ps, X86YmmVar, X86Mem) + + //! Convert packed SP-FP to HP-FP. + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86XmmVar, X86XmmVar, Imm) + //! \overload + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86Mem, X86XmmVar, Imm) + //! \overload + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86XmmVar, X86YmmVar, Imm) + //! \overload + INST_3i(vcvtps2ph, kX86InstIdVcvtps2ph, X86Mem, X86YmmVar, Imm) + + // -------------------------------------------------------------------------- + // [Cleanup] + // -------------------------------------------------------------------------- + +#undef INST_0x + +#undef INST_1x +#undef INST_1i +#undef INST_1cc + +#undef INST_2x +#undef INST_2i +#undef INST_2cc + +#undef INST_3x +#undef INST_3i +#undef INST_3ii + +#undef INST_4x +#undef INST_4i +#undef INST_4ii +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER +#endif // _ASMJIT_X86_X86COMPILER_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86compilercontext.cpp b/DynamicHooks/thirdparty/AsmJit/x86/x86compilercontext.cpp new file mode 100644 index 0000000..3d42be5 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86compilercontext.cpp @@ -0,0 +1,5921 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) && (defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64)) + +// [Dependencies] +#include "../base/containers.h" +#include "../base/cpuinfo.h" +#include "../base/utils.h" +#include "../x86/x86assembler.h" +#include "../x86/x86compiler.h" +#include "../x86/x86compilercontext_p.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +static Error X86Context_translateOperands(X86Context* self, Operand* opList, uint32_t opCount); + +// ============================================================================ +// [asmjit::X86Context - Utils] +// ============================================================================ + +// Getting `VarClass` is the only safe operation when dealing with denormalized +// `varType`. Any other property would require to map vType to the architecture +// specific type. +static ASMJIT_INLINE uint32_t x86VarTypeToClass(uint32_t vType) noexcept { + ASMJIT_ASSERT(vType < kX86VarTypeCount); + return _x86VarInfo[vType].getRegClass(); +} + +// ============================================================================ +// [asmjit::X86Context - Annotate] +// ============================================================================ + +// Annotation is also used by ASMJIT_TRACE. +#if !defined(ASMJIT_DISABLE_LOGGER) +static void X86Context_annotateVariable(X86Context* self, + StringBuilder& sb, const VarData* vd) { + + const char* name = vd->getName(); + if (name != nullptr && name[0] != '\0') { + sb.appendString(name); + } + else { + sb.appendChar('v'); + sb.appendUInt(vd->getId() & Operand::kIdIndexMask); + } +} + +static void X86Context_annotateOperand(X86Context* self, + StringBuilder& sb, const Operand* op) { + + if (op->isVar()) { + X86Context_annotateVariable(self, sb, self->_compiler->getVdById(op->getId())); + } + else if (op->isMem()) { + const X86Mem* m = static_cast(op); + bool isAbsolute = false; + + sb.appendChar('['); + switch (m->getMemType()) { + case kMemTypeBaseIndex: + case kMemTypeStackIndex: + // [base + index << shift + displacement] + X86Context_annotateVariable(self, sb, self->_compiler->getVdById(m->getBase())); + break; + + case kMemTypeLabel: + // [label + index << shift + displacement] + sb.appendFormat("L%u", m->getBase()); + break; + + case kMemTypeAbsolute: + // [absolute] + isAbsolute = true; + sb.appendUInt(static_cast(m->getDisplacement()), 16); + break; + } + + if (m->hasIndex()) { + sb.appendChar('+'); + X86Context_annotateVariable(self, sb, self->_compiler->getVdById(m->getIndex())); + + if (m->getShift()) { + sb.appendChar('*'); + sb.appendChar("1248"[m->getShift() & 3]); + } + } + + if (m->getDisplacement() && !isAbsolute) { + uint32_t base = 10; + int32_t dispOffset = m->getDisplacement(); + + char prefix = '+'; + if (dispOffset < 0) { + dispOffset = -dispOffset; + prefix = '-'; + } + + sb.appendChar(prefix); + // TODO: Enable again: + // if ((loggerOptions & (Logger::kOptionHexDisplacement)) != 0 && dispOffset > 9) { + // sb.appendString("0x", 2); + // base = 16; + // } + sb.appendUInt(static_cast(dispOffset), base); + } + + sb.appendChar(']'); + } + else if (op->isImm()) { + const Imm* i = static_cast(op); + int64_t val = i->getInt64(); + + /* + if ((loggerOptions & (1 << Logger::kOptionHexImmediate)) && static_cast(val) > 9) + sb.appendUInt(static_cast(val), 16); + else*/ + sb.appendInt(val, 10); + } + else if (op->isLabel()) { + sb.appendFormat("L%u", op->getId()); + } + else { + sb.appendString("None", 4); + } +} + +static bool X86Context_annotateInstruction(X86Context* self, + StringBuilder& sb, uint32_t instId, const Operand* opList, uint32_t opCount) { + + sb.appendString(X86Util::getInstNameById(instId)); + for (uint32_t i = 0; i < opCount; i++) { + if (i == 0) + sb.appendChar(' '); + else + sb.appendString(", ", 2); + X86Context_annotateOperand(self, sb, &opList[i]); + } + return true; +} +#endif // !ASMJIT_DISABLE_LOGGER + +#if defined(ASMJIT_TRACE) +static void ASMJIT_CDECL X86Context_traceNode(X86Context* self, HLNode* node_, const char* prefix) { + StringBuilderTmp<256> sb; + + switch (node_->getType()) { + case HLNode::kTypeAlign: { + HLAlign* node = static_cast(node_); + sb.appendFormat(".align %u (%s)", + node->getOffset(), + node->getAlignMode() == kAlignCode ? "code" : "data"); + break; + } + + case HLNode::kTypeData: { + HLData* node = static_cast(node_); + sb.appendFormat(".embed (%u bytes)", node->getSize()); + break; + } + + case HLNode::kTypeComment: { + HLComment* node = static_cast(node_); + sb.appendFormat("; %s", node->getComment()); + break; + } + + case HLNode::kTypeHint: { + HLHint* node = static_cast(node_); + static const char* hint[16] = { + "alloc", + "spill", + "save", + "save-unuse", + "unuse" + }; + sb.appendFormat("[%s] %s", + hint[node->getHint()], node->getVd()->getName()); + break; + } + + case HLNode::kTypeLabel: { + HLLabel* node = static_cast(node_); + sb.appendFormat("L%u: (NumRefs=%u)", + node->getLabelId(), + node->getNumRefs()); + break; + } + + case HLNode::kTypeInst: { + HLInst* node = static_cast(node_); + X86Context_annotateInstruction(self, sb, + node->getInstId(), node->getOpList(), node->getOpCount()); + break; + } + + case HLNode::kTypeFunc: { + HLFunc* node = static_cast(node_); + sb.appendFormat("[func]"); + break; + } + + case HLNode::kTypeSentinel: { + HLSentinel* node = static_cast(node_); + sb.appendFormat("[end]"); + break; + } + + case HLNode::kTypeRet: { + HLRet* node = static_cast(node_); + sb.appendFormat("[ret]"); + break; + } + + case HLNode::kTypeCall: { + HLCall* node = static_cast(node_); + sb.appendFormat("[call]"); + break; + } + + case HLNode::kTypeCallArg: { + HLCallArg* node = static_cast(node_); + sb.appendFormat("[sarg]"); + break; + } + + default: { + sb.appendFormat("[unknown]"); + break; + } + } + + ASMJIT_TLOG("%s[%05u] %s\n", prefix, node_->getFlowId(), sb.getData()); +} +#endif // ASMJIT_TRACE + +// ============================================================================ +// [asmjit::X86Context - Construction / Destruction] +// ============================================================================ + +X86Context::X86Context(X86Compiler* compiler) : Context(compiler) { + _varMapToVaListOffset = ASMJIT_OFFSET_OF(X86VarMap, _list); + _regCount = compiler->_regCount; + + _zsp = compiler->zsp; + _zbp = compiler->zbp; + + _memSlot._vmem.type = kMemTypeStackIndex; + _memSlot.setGpdBase(compiler->getArch() == kArchX86); + +#if defined(ASMJIT_TRACE) + _traceNode = (TraceNodeFunc)X86Context_traceNode; +#endif // ASMJIT_TRACE + +#if !defined(ASMJIT_DISABLE_LOGGER) + _emitComments = compiler->getAssembler()->hasLogger(); +#endif // !ASMJIT_DISABLE_LOGGER + + _state = &_x86State; + reset(); +} +X86Context::~X86Context() {} + +// ============================================================================ +// [asmjit::X86Context - Reset] +// ============================================================================ + +void X86Context::reset(bool releaseMemory) { + Context::reset(releaseMemory); + + _x86State.reset(0); + _clobberedRegs.reset(); + + _stackFrameCell = nullptr; + _gaRegs[kX86RegClassGp ] = Utils::bits(_regCount.getGp()) & ~Utils::mask(kX86RegIndexSp); + _gaRegs[kX86RegClassMm ] = Utils::bits(_regCount.getMm()); + _gaRegs[kX86RegClassK ] = Utils::bits(_regCount.getK()); + _gaRegs[kX86RegClassXyz] = Utils::bits(_regCount.getXyz()); + + _argBaseReg = kInvalidReg; // Used by patcher. + _varBaseReg = kInvalidReg; // Used by patcher. + + _argBaseOffset = 0; // Used by patcher. + _varBaseOffset = 0; // Used by patcher. + + _argActualDisp = 0; // Used by translator. + _varActualDisp = 0; // Used by translator. +} + +// ============================================================================ +// [asmjit::X86SpecialInst] +// ============================================================================ + +struct X86SpecialInst { + uint8_t inReg; + uint8_t outReg; + uint16_t flags; +}; + +static const X86SpecialInst x86SpecialInstCpuid[] = { + { kX86RegIndexAx, kX86RegIndexAx, kVarAttrXReg }, + { kInvalidReg , kX86RegIndexBx, kVarAttrWReg }, + { kInvalidReg , kX86RegIndexCx, kVarAttrWReg }, + { kInvalidReg , kX86RegIndexDx, kVarAttrWReg } +}; + +static const X86SpecialInst x86SpecialInstCbwCdqeCwde[] = { + { kX86RegIndexAx, kX86RegIndexAx, kVarAttrXReg } +}; + +static const X86SpecialInst x86SpecialInstCdqCwdCqo[] = { + { kInvalidReg , kX86RegIndexDx, kVarAttrWReg }, + { kX86RegIndexAx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstCmpxchg[] = { + { kX86RegIndexAx, kX86RegIndexAx, kVarAttrXReg }, + { kInvalidReg , kInvalidReg , kVarAttrXReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstCmpxchg8b16b[] = { + { kX86RegIndexDx, kX86RegIndexDx, kVarAttrXReg }, + { kX86RegIndexAx, kX86RegIndexAx, kVarAttrXReg }, + { kX86RegIndexCx, kInvalidReg , kVarAttrRReg }, + { kX86RegIndexBx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstDaaDas[] = { + { kX86RegIndexAx, kX86RegIndexAx, kVarAttrXReg } +}; + +static const X86SpecialInst x86SpecialInstDiv[] = { + { kInvalidReg , kX86RegIndexDx, kVarAttrXReg }, + { kX86RegIndexAx, kX86RegIndexAx, kVarAttrXReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstJecxz[] = { + { kX86RegIndexCx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstLods[] = { + { kInvalidReg , kX86RegIndexAx, kVarAttrWReg }, + { kX86RegIndexSi, kX86RegIndexSi, kVarAttrXReg }, + { kX86RegIndexCx, kX86RegIndexCx, kVarAttrXReg } +}; + +static const X86SpecialInst x86SpecialInstMul[] = { + { kInvalidReg , kX86RegIndexDx, kVarAttrWReg }, + { kX86RegIndexAx, kX86RegIndexAx, kVarAttrXReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstMovPtr[] = { + { kInvalidReg , kX86RegIndexAx, kVarAttrWReg }, + { kX86RegIndexAx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstMovsCmps[] = { + { kX86RegIndexDi, kX86RegIndexDi, kVarAttrXReg }, + { kX86RegIndexSi, kX86RegIndexSi, kVarAttrXReg }, + { kX86RegIndexCx, kX86RegIndexCx, kVarAttrXReg } +}; + +static const X86SpecialInst x86SpecialInstLahf[] = { + { kInvalidReg , kX86RegIndexAx, kVarAttrWReg } +}; + +static const X86SpecialInst x86SpecialInstSahf[] = { + { kX86RegIndexAx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstMaskmovqMaskmovdqu[] = { + { kInvalidReg , kX86RegIndexDi, kVarAttrRReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstRdtscRdtscp[] = { + { kInvalidReg , kX86RegIndexDx, kVarAttrWReg }, + { kInvalidReg , kX86RegIndexAx, kVarAttrWReg }, + { kInvalidReg , kX86RegIndexCx, kVarAttrWReg } +}; + +static const X86SpecialInst x86SpecialInstRot[] = { + { kInvalidReg , kInvalidReg , kVarAttrXReg }, + { kX86RegIndexCx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstScas[] = { + { kX86RegIndexDi, kX86RegIndexDi, kVarAttrXReg }, + { kX86RegIndexAx, kInvalidReg , kVarAttrRReg }, + { kX86RegIndexCx, kX86RegIndexCx, kVarAttrXReg } +}; + +static const X86SpecialInst x86SpecialInstShldShrd[] = { + { kInvalidReg , kInvalidReg , kVarAttrXReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg }, + { kX86RegIndexCx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstStos[] = { + { kX86RegIndexDi, kInvalidReg , kVarAttrRReg }, + { kX86RegIndexAx, kInvalidReg , kVarAttrRReg }, + { kX86RegIndexCx, kX86RegIndexCx, kVarAttrXReg } +}; + +static const X86SpecialInst x86SpecialInstThirdXMM0[] = { + { kInvalidReg , kInvalidReg , kVarAttrWReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg }, + { 0 , kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstPcmpistri[] = { + { kInvalidReg , kX86RegIndexCx, kVarAttrWReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg } +}; +static const X86SpecialInst x86SpecialInstPcmpistrm[] = { + { kInvalidReg , 0 , kVarAttrWReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg }, + { kInvalidReg , kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstXsaveXrstor[] = { + { kInvalidReg , kInvalidReg , 0 }, + { kX86RegIndexDx, kInvalidReg , kVarAttrRReg }, + { kX86RegIndexAx, kInvalidReg , kVarAttrRReg } +}; + +static const X86SpecialInst x86SpecialInstXgetbv[] = { + { kX86RegIndexCx, kInvalidReg , kVarAttrRReg }, + { kInvalidReg , kX86RegIndexDx, kVarAttrWReg }, + { kInvalidReg , kX86RegIndexAx, kVarAttrWReg } +}; + +static const X86SpecialInst x86SpecialInstXsetbv[] = { + { kX86RegIndexCx, kInvalidReg , kVarAttrRReg }, + { kX86RegIndexDx, kInvalidReg , kVarAttrRReg }, + { kX86RegIndexAx, kInvalidReg , kVarAttrRReg } +}; + +static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t instId, const Operand* opList, uint32_t opCount) { + switch (instId) { + case kX86InstIdCpuid: + return x86SpecialInstCpuid; + + case kX86InstIdCbw: + case kX86InstIdCdqe: + case kX86InstIdCwde: + return x86SpecialInstCbwCdqeCwde; + + case kX86InstIdCdq: + case kX86InstIdCwd: + case kX86InstIdCqo: + return x86SpecialInstCdqCwdCqo; + + case kX86InstIdCmpsB: + case kX86InstIdCmpsD: + case kX86InstIdCmpsQ: + case kX86InstIdCmpsW: + case kX86InstIdRepeCmpsB: + case kX86InstIdRepeCmpsD: + case kX86InstIdRepeCmpsQ: + case kX86InstIdRepeCmpsW: + case kX86InstIdRepneCmpsB: + case kX86InstIdRepneCmpsD: + case kX86InstIdRepneCmpsQ: + case kX86InstIdRepneCmpsW: + return x86SpecialInstMovsCmps; + + case kX86InstIdCmpxchg: + return x86SpecialInstCmpxchg; + + case kX86InstIdCmpxchg8b: + case kX86InstIdCmpxchg16b: + return x86SpecialInstCmpxchg8b16b; + + case kX86InstIdDaa: + case kX86InstIdDas: + return x86SpecialInstDaaDas; + + case kX86InstIdJecxz: + return x86SpecialInstJecxz; + + case kX86InstIdIdiv: + case kX86InstIdDiv: + return x86SpecialInstDiv; + + case kX86InstIdImul: + if (opCount == 2) + return nullptr; + if (opCount == 3 && !(opList[0].isVar() && opList[1].isVar() && opList[2].isVarOrMem())) + return nullptr; + ASMJIT_FALLTHROUGH; + + case kX86InstIdMul: + return x86SpecialInstMul; + + case kX86InstIdMovPtr: + return x86SpecialInstMovPtr; + + case kX86InstIdLodsB: + case kX86InstIdLodsD: + case kX86InstIdLodsQ: + case kX86InstIdLodsW: + case kX86InstIdRepLodsB: + case kX86InstIdRepLodsD: + case kX86InstIdRepLodsQ: + case kX86InstIdRepLodsW: + return x86SpecialInstLods; + + case kX86InstIdMovsB: + case kX86InstIdMovsD: + case kX86InstIdMovsQ: + case kX86InstIdMovsW: + case kX86InstIdRepMovsB: + case kX86InstIdRepMovsD: + case kX86InstIdRepMovsQ: + case kX86InstIdRepMovsW: + return x86SpecialInstMovsCmps; + + case kX86InstIdLahf: + return x86SpecialInstLahf; + + case kX86InstIdSahf: + return x86SpecialInstSahf; + + case kX86InstIdMaskmovq: + case kX86InstIdMaskmovdqu: + case kX86InstIdVmaskmovdqu: + return x86SpecialInstMaskmovqMaskmovdqu; + + // Not supported. + case kX86InstIdEnter: + case kX86InstIdLeave: + return nullptr; + + // Not supported. + case kX86InstIdRet: + return nullptr; + + case kX86InstIdMonitor: + case kX86InstIdMwait: + // TODO: [COMPILER] Monitor/MWait. + return nullptr; + + case kX86InstIdPop: + // TODO: [COMPILER] Pop. + return nullptr; + + // Not supported. + case kX86InstIdPopa: + case kX86InstIdPopf: + return nullptr; + + case kX86InstIdPush: + // TODO: [COMPILER] Push. + return nullptr; + + // Not supported. + case kX86InstIdPusha: + case kX86InstIdPushf: + return nullptr; + + // Rot instruction is special only if the last operand is a variable. + case kX86InstIdRcl: + case kX86InstIdRcr: + case kX86InstIdRol: + case kX86InstIdRor: + case kX86InstIdSal: + case kX86InstIdSar: + case kX86InstIdShl: + case kX86InstIdShr: + if (!opList[1].isVar()) + return nullptr; + return x86SpecialInstRot; + + // Shld/Shrd instruction is special only if the last operand is a variable. + case kX86InstIdShld: + case kX86InstIdShrd: + if (!opList[2].isVar()) + return nullptr; + return x86SpecialInstShldShrd; + + case kX86InstIdRdtsc: + case kX86InstIdRdtscp: + return x86SpecialInstRdtscRdtscp; + + case kX86InstIdScasB: + case kX86InstIdScasD: + case kX86InstIdScasQ: + case kX86InstIdScasW: + case kX86InstIdRepeScasB: + case kX86InstIdRepeScasD: + case kX86InstIdRepeScasQ: + case kX86InstIdRepeScasW: + case kX86InstIdRepneScasB: + case kX86InstIdRepneScasD: + case kX86InstIdRepneScasQ: + case kX86InstIdRepneScasW: + return x86SpecialInstScas; + + case kX86InstIdStosB: + case kX86InstIdStosD: + case kX86InstIdStosQ: + case kX86InstIdStosW: + case kX86InstIdRepStosB: + case kX86InstIdRepStosD: + case kX86InstIdRepStosQ: + case kX86InstIdRepStosW: + return x86SpecialInstStos; + + case kX86InstIdBlendvpd: + case kX86InstIdBlendvps: + case kX86InstIdPblendvb: + case kX86InstIdSha256rnds2: + return x86SpecialInstThirdXMM0; + + case kX86InstIdPcmpestri: + case kX86InstIdPcmpistri: + case kX86InstIdVpcmpestri: + case kX86InstIdVpcmpistri: + return x86SpecialInstPcmpistri; + + case kX86InstIdPcmpestrm: + case kX86InstIdPcmpistrm: + case kX86InstIdVpcmpestrm: + case kX86InstIdVpcmpistrm: + return x86SpecialInstPcmpistrm; + + case kX86InstIdXrstor: + case kX86InstIdXrstor64: + case kX86InstIdXsave: + case kX86InstIdXsave64: + case kX86InstIdXsaveopt: + case kX86InstIdXsaveopt64: + return x86SpecialInstXsaveXrstor; + + case kX86InstIdXgetbv: + return x86SpecialInstXgetbv; + + case kX86InstIdXsetbv: + return x86SpecialInstXsetbv; + + default: + return nullptr; + } +} + +// ============================================================================ +// [asmjit::X86Context - EmitLoad] +// ============================================================================ + +void X86Context::emitLoad(VarData* vd, uint32_t regIndex, const char* reason) { + ASMJIT_ASSERT(regIndex != kInvalidReg); + + X86Compiler* compiler = getCompiler(); + X86Mem m = getVarMem(vd); + + HLNode* node = nullptr; + + switch (vd->getType()) { + case kVarTypeInt8: + case kVarTypeUInt8: + node = compiler->emit(kX86InstIdMov, x86::gpb_lo(regIndex), m); + break; + + case kVarTypeInt16: + case kVarTypeUInt16: + node = compiler->emit(kX86InstIdMov, x86::gpw(regIndex), m); + break; + + case kVarTypeInt32: + case kVarTypeUInt32: + node = compiler->emit(kX86InstIdMov, x86::gpd(regIndex), m); + break; + +#if defined(ASMJIT_BUILD_X64) + case kVarTypeInt64: + case kVarTypeUInt64: + ASMJIT_ASSERT(_compiler->getArch() != kArchX86); + node = compiler->emit(kX86InstIdMov, x86::gpq(regIndex), m); + break; +#endif // ASMJIT_BUILD_X64 + + case kX86VarTypeMm: + node = compiler->emit(kX86InstIdMovq, x86::mm(regIndex), m); + break; + + case kX86VarTypeXmm: + node = compiler->emit(kX86InstIdMovdqa, x86::xmm(regIndex), m); + break; + + case kX86VarTypeXmmSs: + node = compiler->emit(kX86InstIdMovss, x86::xmm(regIndex), m); + break; + + case kX86VarTypeXmmSd: + node = compiler->emit(kX86InstIdMovsd, x86::xmm(regIndex), m); + break; + + case kX86VarTypeXmmPs: + node = compiler->emit(kX86InstIdMovaps, x86::xmm(regIndex), m); + break; + + case kX86VarTypeXmmPd: + node = compiler->emit(kX86InstIdMovapd, x86::xmm(regIndex), m); + break; + + // Compiler doesn't manage FPU stack. + case kVarTypeFp32: + case kVarTypeFp64: + default: + ASMJIT_NOT_REACHED(); + } + + if (!_emitComments) + return; + node->setComment(compiler->_stringAllocator.sformat("[%s] %s", reason, vd->getName())); +} + +// ============================================================================ +// [asmjit::X86Context - EmitSave] +// ============================================================================ + +void X86Context::emitSave(VarData* vd, uint32_t regIndex, const char* reason) { + ASMJIT_ASSERT(regIndex != kInvalidReg); + + X86Compiler* compiler = getCompiler(); + X86Mem m = getVarMem(vd); + + HLNode* node = nullptr; + + switch (vd->getType()) { + case kVarTypeInt8: + case kVarTypeUInt8: + node = compiler->emit(kX86InstIdMov, m, x86::gpb_lo(regIndex)); + break; + + case kVarTypeInt16: + case kVarTypeUInt16: + node = compiler->emit(kX86InstIdMov, m, x86::gpw(regIndex)); + break; + + case kVarTypeInt32: + case kVarTypeUInt32: + node = compiler->emit(kX86InstIdMov, m, x86::gpd(regIndex)); + break; + +#if defined(ASMJIT_BUILD_X64) + case kVarTypeInt64: + case kVarTypeUInt64: + node = compiler->emit(kX86InstIdMov, m, x86::gpq(regIndex)); + break; +#endif // ASMJIT_BUILD_X64 + + case kX86VarTypeMm: + node = compiler->emit(kX86InstIdMovq, m, x86::mm(regIndex)); + break; + + case kX86VarTypeXmm: + node = compiler->emit(kX86InstIdMovdqa, m, x86::xmm(regIndex)); + break; + + case kX86VarTypeXmmSs: + node = compiler->emit(kX86InstIdMovss, m, x86::xmm(regIndex)); + break; + + case kX86VarTypeXmmSd: + node = compiler->emit(kX86InstIdMovsd, m, x86::xmm(regIndex)); + break; + + case kX86VarTypeXmmPs: + node = compiler->emit(kX86InstIdMovaps, m, x86::xmm(regIndex)); + break; + + case kX86VarTypeXmmPd: + node = compiler->emit(kX86InstIdMovapd, m, x86::xmm(regIndex)); + break; + + // Compiler doesn't manage FPU stack. + case kVarTypeFp32: + case kVarTypeFp64: + default: + ASMJIT_NOT_REACHED(); + } + + if (!_emitComments) + return; + node->setComment(compiler->_stringAllocator.sformat("[%s] %s", reason, vd->getName())); +} + +// ============================================================================ +// [asmjit::X86Context - EmitMove] +// ============================================================================ + +void X86Context::emitMove(VarData* vd, uint32_t toRegIndex, uint32_t fromRegIndex, const char* reason) { + ASMJIT_ASSERT(toRegIndex != kInvalidReg); + ASMJIT_ASSERT(fromRegIndex != kInvalidReg); + + X86Compiler* compiler = getCompiler(); + HLNode* node = nullptr; + + switch (vd->getType()) { + case kVarTypeInt8: + case kVarTypeUInt8: + case kVarTypeInt16: + case kVarTypeUInt16: + case kVarTypeInt32: + case kVarTypeUInt32: + node = compiler->emit(kX86InstIdMov, x86::gpd(toRegIndex), x86::gpd(fromRegIndex)); + break; + +#if defined(ASMJIT_BUILD_X64) + case kVarTypeInt64: + case kVarTypeUInt64: + node = compiler->emit(kX86InstIdMov, x86::gpq(toRegIndex), x86::gpq(fromRegIndex)); + break; +#endif // ASMJIT_BUILD_X64 + + case kX86VarTypeMm: + node = compiler->emit(kX86InstIdMovq, x86::mm(toRegIndex), x86::mm(fromRegIndex)); + break; + + case kX86VarTypeXmm: + node = compiler->emit(kX86InstIdMovaps, x86::xmm(toRegIndex), x86::xmm(fromRegIndex)); + break; + + case kX86VarTypeXmmSs: + node = compiler->emit(kX86InstIdMovss, x86::xmm(toRegIndex), x86::xmm(fromRegIndex)); + break; + + case kX86VarTypeXmmSd: + node = compiler->emit(kX86InstIdMovsd, x86::xmm(toRegIndex), x86::xmm(fromRegIndex)); + break; + + case kX86VarTypeXmmPs: + case kX86VarTypeXmmPd: + node = compiler->emit(kX86InstIdMovaps, x86::xmm(toRegIndex), x86::xmm(fromRegIndex)); + break; + + case kVarTypeFp32: + case kVarTypeFp64: + default: + // Compiler doesn't manage FPU stack. + ASMJIT_NOT_REACHED(); + } + + if (!_emitComments) + return; + node->setComment(compiler->_stringAllocator.sformat("[%s] %s", reason, vd->getName())); +} + +// ============================================================================ +// [asmjit::X86Context - EmitSwap] +// ============================================================================ + +void X86Context::emitSwapGp(VarData* aVd, VarData* bVd, uint32_t aIndex, uint32_t bIndex, const char* reason) { + ASMJIT_ASSERT(aIndex != kInvalidReg); + ASMJIT_ASSERT(bIndex != kInvalidReg); + + X86Compiler* compiler = getCompiler(); + HLNode* node = nullptr; + +#if defined(ASMJIT_BUILD_X64) + uint32_t vType = Utils::iMax(aVd->getType(), bVd->getType()); + if (vType == kVarTypeInt64 || vType == kVarTypeUInt64) { + node = compiler->emit(kX86InstIdXchg, x86::gpq(aIndex), x86::gpq(bIndex)); + } + else { +#endif // ASMJIT_BUILD_X64 + node = compiler->emit(kX86InstIdXchg, x86::gpd(aIndex), x86::gpd(bIndex)); +#if defined(ASMJIT_BUILD_X64) + } +#endif // ASMJIT_BUILD_X64 + + if (!_emitComments) + return; + node->setComment(compiler->_stringAllocator.sformat("[%s] %s, %s", reason, aVd->getName(), bVd->getName())); +} + +// ============================================================================ +// [asmjit::X86Context - EmitPushSequence / EmitPopSequence] +// ============================================================================ + +void X86Context::emitPushSequence(uint32_t regs) { + X86Compiler* compiler = getCompiler(); + uint32_t i = 0; + + X86GpReg gpReg(_zsp); + while (regs != 0) { + ASMJIT_ASSERT(i < _regCount.getGp()); + if ((regs & 0x1) != 0) + compiler->emit(kX86InstIdPush, gpReg.setIndex(i)); + i++; + regs >>= 1; + } +} + +void X86Context::emitPopSequence(uint32_t regs) { + X86Compiler* compiler = getCompiler(); + + if (regs == 0) + return; + + uint32_t i = static_cast(_regCount.getGp()); + uint32_t mask = 0x1 << static_cast(i - 1); + + X86GpReg gpReg(_zsp); + while (i) { + i--; + if ((regs & mask) != 0) + compiler->emit(kX86InstIdPop, gpReg.setIndex(i)); + mask >>= 1; + } +} + +// ============================================================================ +// [asmjit::X86Context - EmitConvertVarToVar] +// ============================================================================ + +void X86Context::emitConvertVarToVar(uint32_t dstType, uint32_t dstIndex, uint32_t srcType, uint32_t srcIndex) { + X86Compiler* compiler = getCompiler(); + + switch (dstType) { + case kVarTypeInt8: + case kVarTypeUInt8: + case kVarTypeInt16: + case kVarTypeUInt16: + case kVarTypeInt32: + case kVarTypeUInt32: + case kVarTypeInt64: + case kVarTypeUInt64: + break; + + case kX86VarTypeXmmPs: + if (srcType == kX86VarTypeXmmPd || srcType == kX86VarTypeYmmPd) { + compiler->emit(kX86InstIdCvtpd2ps, x86::xmm(dstIndex), x86::xmm(srcIndex)); + return; + } + ASMJIT_FALLTHROUGH; + + case kX86VarTypeXmmSs: + if (srcType == kX86VarTypeXmmSd || srcType == kX86VarTypeXmmPd || srcType == kX86VarTypeYmmPd) { + compiler->emit(kX86InstIdCvtsd2ss, x86::xmm(dstIndex), x86::xmm(srcIndex)); + return; + } + + if (Utils::inInterval(srcType, _kVarTypeIntStart, _kVarTypeIntEnd)) { + // TODO: [COMPILER] Variable conversion not supported. + ASMJIT_NOT_REACHED(); + } + break; + + case kX86VarTypeXmmPd: + if (srcType == kX86VarTypeXmmPs || srcType == kX86VarTypeYmmPs) { + compiler->emit(kX86InstIdCvtps2pd, x86::xmm(dstIndex), x86::xmm(srcIndex)); + return; + } + ASMJIT_FALLTHROUGH; + + case kX86VarTypeXmmSd: + if (srcType == kX86VarTypeXmmSs || srcType == kX86VarTypeXmmPs || srcType == kX86VarTypeYmmPs) { + compiler->emit(kX86InstIdCvtss2sd, x86::xmm(dstIndex), x86::xmm(srcIndex)); + return; + } + + if (Utils::inInterval(srcType, _kVarTypeIntStart, _kVarTypeIntEnd)) { + // TODO: [COMPILER] Variable conversion not supported. + ASMJIT_NOT_REACHED(); + } + break; + } +} + +// ============================================================================ +// [asmjit::X86Context - EmitMoveVarOnStack / EmitMoveImmOnStack] +// ============================================================================ + +void X86Context::emitMoveVarOnStack( + uint32_t dstType, const X86Mem* dst, + uint32_t srcType, uint32_t srcIndex) { + + ASMJIT_ASSERT(srcIndex != kInvalidReg); + X86Compiler* compiler = getCompiler(); + + X86Mem m0(*dst); + X86Reg r0, r1; + + uint32_t regSize = compiler->getRegSize(); + uint32_t instId; + + switch (dstType) { + case kVarTypeInt8: + case kVarTypeUInt8: + // Move DWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt8, kVarTypeUInt64)) + goto _MovGpD; + + // Move DWORD (MMX). + if (Utils::inInterval(srcType, kX86VarTypeMm, kX86VarTypeMm)) + goto _MovMmD; + + // Move DWORD (XMM). + if (Utils::inInterval(srcType, kX86VarTypeXmm, kX86VarTypeXmmPd)) + goto _MovXmmD; + break; + + case kVarTypeInt16: + case kVarTypeUInt16: + // Extend BYTE->WORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kX86RegTypeGpbLo, srcIndex); + + instId = (dstType == kVarTypeInt16 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx; + goto _ExtendMovGpD; + } + + // Move DWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt16, kVarTypeUInt64)) + goto _MovGpD; + + // Move DWORD (MMX). + if (Utils::inInterval(srcType, kX86VarTypeMm, kX86VarTypeMm)) + goto _MovMmD; + + // Move DWORD (XMM). + if (Utils::inInterval(srcType, kX86VarTypeXmm, kX86VarTypeXmmPd)) + goto _MovXmmD; + break; + + case kVarTypeInt32: + case kVarTypeUInt32: + // Extend BYTE->DWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kX86RegTypeGpbLo, srcIndex); + + instId = (dstType == kVarTypeInt32 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx; + goto _ExtendMovGpD; + } + + // Extend WORD->DWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt16, kVarTypeUInt16)) { + r1.setSize(2); + r1.setCode(kX86RegTypeGpw, srcIndex); + + instId = (dstType == kVarTypeInt32 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx; + goto _ExtendMovGpD; + } + + // Move DWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt32, kVarTypeUInt64)) + goto _MovGpD; + + // Move DWORD (MMX). + if (Utils::inInterval(srcType, kX86VarTypeMm, kX86VarTypeMm)) + goto _MovMmD; + + // Move DWORD (XMM). + if (Utils::inInterval(srcType, kX86VarTypeXmm, kX86VarTypeXmmPd)) + goto _MovXmmD; + break; + + case kVarTypeInt64: + case kVarTypeUInt64: + // Extend BYTE->QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kX86RegTypeGpbLo, srcIndex); + + instId = (dstType == kVarTypeInt64 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx; + goto _ExtendMovGpXQ; + } + + // Extend WORD->QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt16, kVarTypeUInt16)) { + r1.setSize(2); + r1.setCode(kX86RegTypeGpw, srcIndex); + + instId = (dstType == kVarTypeInt64 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx; + goto _ExtendMovGpXQ; + } + + // Extend DWORD->QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt32, kVarTypeUInt32)) { + r1.setSize(4); + r1.setCode(kX86RegTypeGpd, srcIndex); + + instId = kX86InstIdMovsxd; + if (dstType == kVarTypeInt64 && srcType == kVarTypeInt32) + goto _ExtendMovGpXQ; + else + goto _ZeroExtendGpDQ; + } + + // Move QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt64, kVarTypeUInt64)) + goto _MovGpQ; + + // Move QWORD (MMX). + if (Utils::inInterval(srcType, kX86VarTypeMm, kX86VarTypeMm)) + goto _MovMmQ; + + // Move QWORD (XMM). + if (Utils::inInterval(srcType, kX86VarTypeXmm, kX86VarTypeXmmPd)) + goto _MovXmmQ; + break; + + case kX86VarTypeMm: + // Extend BYTE->QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kX86RegTypeGpbLo, srcIndex); + + instId = kX86InstIdMovzx; + goto _ExtendMovGpXQ; + } + + // Extend WORD->QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt16, kVarTypeUInt16)) { + r1.setSize(2); + r1.setCode(kX86RegTypeGpw, srcIndex); + + instId = kX86InstIdMovzx; + goto _ExtendMovGpXQ; + } + + // Extend DWORD->QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt32, kVarTypeUInt32)) + goto _ExtendMovGpDQ; + + // Move QWORD (GP). + if (Utils::inInterval(srcType, kVarTypeInt64, kVarTypeUInt64)) + goto _MovGpQ; + + // Move QWORD (MMX). + if (Utils::inInterval(srcType, kX86VarTypeMm, kX86VarTypeMm)) + goto _MovMmQ; + + // Move QWORD (XMM). + if (Utils::inInterval(srcType, kX86VarTypeXmm, kX86VarTypeXmmPd)) + goto _MovXmmQ; + break; + + case kVarTypeFp32: + case kX86VarTypeXmmSs: + // Move FLOAT. + if (srcType == kX86VarTypeXmmSs || srcType == kX86VarTypeXmmPs || srcType == kX86VarTypeXmm) + goto _MovXmmD; + + ASMJIT_NOT_REACHED(); + break; + + case kVarTypeFp64: + case kX86VarTypeXmmSd: + // Move DOUBLE. + if (srcType == kX86VarTypeXmmSd || srcType == kX86VarTypeXmmPd || srcType == kX86VarTypeXmm) + goto _MovXmmQ; + + ASMJIT_NOT_REACHED(); + break; + + case kX86VarTypeXmm: + // TODO: [COMPILER]. + ASMJIT_NOT_REACHED(); + break; + + case kX86VarTypeXmmPs: + // TODO: [COMPILER]. + ASMJIT_NOT_REACHED(); + break; + + case kX86VarTypeXmmPd: + // TODO: [COMPILER]. + ASMJIT_NOT_REACHED(); + break; + } + return; + + // Extend+Move Gp. +_ExtendMovGpD: + m0.setSize(4); + r0.setSize(4); + r0.setCode(kX86RegTypeGpd, srcIndex); + + compiler->emit(instId, r0, r1); + compiler->emit(kX86InstIdMov, m0, r0); + return; + +_ExtendMovGpXQ: + if (regSize == 8) { + m0.setSize(8); + r0.setSize(8); + r0.setCode(kX86RegTypeGpq, srcIndex); + + compiler->emit(instId, r0, r1); + compiler->emit(kX86InstIdMov, m0, r0); + } + else { + m0.setSize(4); + r0.setSize(4); + r0.setCode(kX86RegTypeGpd, srcIndex); + + compiler->emit(instId, r0, r1); + +_ExtendMovGpDQ: + compiler->emit(kX86InstIdMov, m0, r0); + m0.adjust(4); + compiler->emit(kX86InstIdAnd, m0, 0); + } + return; + +_ZeroExtendGpDQ: + m0.setSize(4); + r0.setSize(4); + r0.setCode(kX86RegTypeGpd, srcIndex); + goto _ExtendMovGpDQ; + + // Move Gp. +_MovGpD: + m0.setSize(4); + r0.setSize(4); + r0.setCode(kX86RegTypeGpd, srcIndex); + compiler->emit(kX86InstIdMov, m0, r0); + return; + +_MovGpQ: + m0.setSize(8); + r0.setSize(8); + r0.setCode(kX86RegTypeGpq, srcIndex); + compiler->emit(kX86InstIdMov, m0, r0); + return; + + // Move Mm. +_MovMmD: + m0.setSize(4); + r0.setSize(8); + r0.setCode(kX86RegTypeMm, srcIndex); + compiler->emit(kX86InstIdMovd, m0, r0); + return; + +_MovMmQ: + m0.setSize(8); + r0.setSize(8); + r0.setCode(kX86RegTypeMm, srcIndex); + compiler->emit(kX86InstIdMovq, m0, r0); + return; + + // Move XMM. +_MovXmmD: + m0.setSize(4); + r0.setSize(16); + r0.setCode(kX86RegTypeXmm, srcIndex); + compiler->emit(kX86InstIdMovss, m0, r0); + return; + +_MovXmmQ: + m0.setSize(8); + r0.setSize(16); + r0.setCode(kX86RegTypeXmm, srcIndex); + compiler->emit(kX86InstIdMovlps, m0, r0); +} + +void X86Context::emitMoveImmOnStack(uint32_t dstType, const X86Mem* dst, const Imm* src) { + X86Compiler* compiler = getCompiler(); + + X86Mem mem(*dst); + Imm imm(*src); + + uint32_t regSize = compiler->getRegSize(); + + // One stack entry is equal to the native register size. That means that if + // we want to move 32-bit integer on the stack, we need to extend it to 64-bit + // integer. + mem.setSize(regSize); + + switch (dstType) { + case kVarTypeInt8: + case kVarTypeUInt8: + imm.truncateTo8Bits(); + goto _Move32; + + case kVarTypeInt16: + case kVarTypeUInt16: + imm.truncateTo16Bits(); + goto _Move32; + + case kVarTypeInt32: + case kVarTypeUInt32: +_Move32: + imm.truncateTo32Bits(); + compiler->emit(kX86InstIdMov, mem, imm); + break; + + case kVarTypeInt64: + case kVarTypeUInt64: +_Move64: + if (regSize == 4) { + uint32_t hi = imm.getUInt32Hi(); + + // Lo-Part. + compiler->emit(kX86InstIdMov, mem, imm.truncateTo32Bits()); + mem.adjust(regSize); + + // Hi-Part. + compiler->emit(kX86InstIdMov, mem, imm.setUInt32(hi)); + } + else { + compiler->emit(kX86InstIdMov, mem, imm); + } + break; + + case kVarTypeFp32: + goto _Move32; + + case kVarTypeFp64: + goto _Move64; + + case kX86VarTypeMm: + goto _Move64; + + case kX86VarTypeXmm: + case kX86VarTypeXmmSs: + case kX86VarTypeXmmPs: + case kX86VarTypeXmmSd: + case kX86VarTypeXmmPd: + if (regSize == 4) { + uint32_t hi = imm.getUInt32Hi(); + + // Lo part. + compiler->emit(kX86InstIdMov, mem, imm.truncateTo32Bits()); + mem.adjust(regSize); + + // Hi part. + compiler->emit(kX86InstIdMov, mem, imm.setUInt32(hi)); + mem.adjust(regSize); + + // Zero part. + compiler->emit(kX86InstIdMov, mem, imm.setUInt32(0)); + mem.adjust(regSize); + + compiler->emit(kX86InstIdMov, mem, imm); + } + else { + // Lo/Hi parts. + compiler->emit(kX86InstIdMov, mem, imm); + mem.adjust(regSize); + + // Zero part. + compiler->emit(kX86InstIdMov, mem, imm.setUInt32(0)); + } + break; + + default: + ASMJIT_NOT_REACHED(); + break; + } +} + +// ============================================================================ +// [asmjit::X86Context - EmitMoveImmToReg] +// ============================================================================ + +void X86Context::emitMoveImmToReg(uint32_t dstType, uint32_t dstIndex, const Imm* src) { + ASMJIT_ASSERT(dstIndex != kInvalidReg); + X86Compiler* compiler = getCompiler(); + + X86Reg r0; + Imm imm(*src); + + switch (dstType) { + case kVarTypeInt8: + case kVarTypeUInt8: + imm.truncateTo8Bits(); + goto _Move32; + + case kVarTypeInt16: + case kVarTypeUInt16: + imm.truncateTo16Bits(); + goto _Move32; + + case kVarTypeInt32: + case kVarTypeUInt32: +_Move32Truncate: + imm.truncateTo32Bits(); +_Move32: + r0.setSize(4); + r0.setCode(kX86RegTypeGpd, dstIndex); + compiler->emit(kX86InstIdMov, r0, imm); + break; + + case kVarTypeInt64: + case kVarTypeUInt64: + // Move to GPD register will also clear high DWORD of GPQ register in + // 64-bit mode. + if (imm.isUInt32()) + goto _Move32Truncate; + + r0.setSize(8); + r0.setCode(kX86RegTypeGpq, dstIndex); + compiler->emit(kX86InstIdMov, r0, imm); + break; + + case kVarTypeFp32: + case kVarTypeFp64: + // Compiler doesn't manage FPU stack. + ASMJIT_NOT_REACHED(); + break; + + case kX86VarTypeMm: + // TODO: [COMPILER] EmitMoveImmToReg. + break; + + case kX86VarTypeXmm: + case kX86VarTypeXmmSs: + case kX86VarTypeXmmSd: + case kX86VarTypeXmmPs: + case kX86VarTypeXmmPd: + // TODO: [COMPILER] EmitMoveImmToReg. + break; + + default: + ASMJIT_NOT_REACHED(); + break; + } +} + +// ============================================================================ +// [asmjit::X86Context - Register Management] +// ============================================================================ + +#if defined(ASMJIT_DEBUG) +template +static ASMJIT_INLINE void X86Context_checkStateVars(X86Context* self) { + X86VarState* state = self->getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t regIndex; + uint32_t regMask; + uint32_t regCount = self->_regCount.get(C); + + uint32_t occupied = state->_occupied.get(C); + uint32_t modified = state->_modified.get(C); + + for (regIndex = 0, regMask = 1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* vd = sVars[regIndex]; + + if (vd == nullptr) { + ASMJIT_ASSERT((occupied & regMask) == 0); + ASMJIT_ASSERT((modified & regMask) == 0); + } + else { + ASMJIT_ASSERT((occupied & regMask) != 0); + ASMJIT_ASSERT((modified & regMask) == (static_cast(vd->isModified()) << regIndex)); + + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() == kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() == regIndex); + } + } +} + +void X86Context::_checkState() { + X86Context_checkStateVars(this); + X86Context_checkStateVars(this); + X86Context_checkStateVars(this); +} +#else +void X86Context::_checkState() {} +#endif // ASMJIT_DEBUG + +// ============================================================================ +// [asmjit::X86Context - State - Load] +// ============================================================================ + +template +static ASMJIT_INLINE void X86Context_loadStateVars(X86Context* self, X86VarState* src) { + X86VarState* cur = self->getState(); + + VarData** cVars = cur->getListByClass(C); + VarData** sVars = src->getListByClass(C); + + uint32_t regIndex; + uint32_t modified = src->_modified.get(C); + uint32_t regCount = self->_regCount.get(C); + + for (regIndex = 0; regIndex < regCount; regIndex++, modified >>= 1) { + VarData* vd = sVars[regIndex]; + cVars[regIndex] = vd; + + if (vd == nullptr) + continue; + + vd->setState(kVarStateReg); + vd->setRegIndex(regIndex); + vd->setModified(modified & 0x1); + } +} + +void X86Context::loadState(VarState* src_) { + X86VarState* cur = getState(); + X86VarState* src = static_cast(src_); + + VarData** vdArray = _contextVd.getData(); + uint32_t vdCount = static_cast(_contextVd.getLength()); + + // Load allocated variables. + X86Context_loadStateVars(this, src); + X86Context_loadStateVars(this, src); + X86Context_loadStateVars(this, src); + + // Load masks. + cur->_occupied = src->_occupied; + cur->_modified = src->_modified; + + // Load states of other variables and clear their 'Modified' flags. + for (uint32_t i = 0; i < vdCount; i++) { + uint32_t vState = src->_cells[i].getState(); + + if (vState == kVarStateReg) + continue; + + vdArray[i]->setState(vState); + vdArray[i]->setRegIndex(kInvalidReg); + vdArray[i]->setModified(false); + } + + ASMJIT_X86_CHECK_STATE +} + +// ============================================================================ +// [asmjit::X86Context - State - Save] +// ============================================================================ + +VarState* X86Context::saveState() { + VarData** vdArray = _contextVd.getData(); + uint32_t vdCount = static_cast(_contextVd.getLength()); + + size_t size = Utils::alignTo( + sizeof(X86VarState) + vdCount * sizeof(X86StateCell), sizeof(void*)); + + X86VarState* cur = getState(); + X86VarState* dst = _zoneAllocator.allocT(size); + + if (dst == nullptr) + return nullptr; + + // Store links. + ::memcpy(dst->_list, cur->_list, X86VarState::kAllCount * sizeof(VarData*)); + + // Store masks. + dst->_occupied = cur->_occupied; + dst->_modified = cur->_modified; + + // Store cells. + for (uint32_t i = 0; i < vdCount; i++) { + VarData* vd = static_cast(vdArray[i]); + X86StateCell& cell = dst->_cells[i]; + + cell.reset(); + cell.setState(vd->getState()); + } + + return dst; +} + +// ============================================================================ +// [asmjit::X86Context - State - Switch] +// ============================================================================ + +template +static ASMJIT_INLINE void X86Context_switchStateVars(X86Context* self, X86VarState* src) { + X86VarState* dst = self->getState(); + + VarData** dVars = dst->getListByClass(C); + VarData** sVars = src->getListByClass(C); + + X86StateCell* cells = src->_cells; + uint32_t regCount = self->_regCount.get(C); + bool didWork; + + do { + didWork = false; + + for (uint32_t regIndex = 0, regMask = 0x1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* dVd = dVars[regIndex]; + VarData* sVd = sVars[regIndex]; + + if (dVd == sVd) + continue; + + if (dVd != nullptr) { + const X86StateCell& cell = cells[dVd->getLocalId()]; + + if (cell.getState() != kVarStateReg) { + if (cell.getState() == kVarStateMem) + self->spill(dVd); + else + self->unuse(dVd); + + dVd = nullptr; + didWork = true; + + if (sVd == nullptr) + continue; + } + } + + if (dVd == nullptr && sVd != nullptr) { +_MoveOrLoad: + if (sVd->getRegIndex() != kInvalidReg) + self->move(sVd, regIndex); + else + self->load(sVd, regIndex); + + didWork = true; + continue; + } + + if (dVd != nullptr) { + const X86StateCell& cell = cells[dVd->getLocalId()]; + if (sVd == nullptr) { + if (cell.getState() == kVarStateReg) + continue; + + if (cell.getState() == kVarStateMem) + self->spill(dVd); + else + self->unuse(dVd); + + didWork = true; + continue; + } + else { + if (cell.getState() == kVarStateReg) { + if (dVd->getRegIndex() != kInvalidReg && sVd->getRegIndex() != kInvalidReg) { + if (C == kX86RegClassGp) { + self->swapGp(dVd, sVd); + } + else { + self->spill(dVd); + self->move(sVd, regIndex); + } + + didWork = true; + continue; + } + else { + didWork = true; + continue; + } + } + + if (cell.getState() == kVarStateMem) + self->spill(dVd); + else + self->unuse(dVd); + goto _MoveOrLoad; + } + } + } + } while (didWork); + + uint32_t dModified = dst->_modified.get(C); + uint32_t sModified = src->_modified.get(C); + + if (dModified != sModified) { + for (uint32_t regIndex = 0, regMask = 0x1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* vd = dVars[regIndex]; + + if (vd == nullptr) + continue; + + if ((dModified & regMask) && !(sModified & regMask)) { + self->save(vd); + continue; + } + + if (!(dModified & regMask) && (sModified & regMask)) { + self->modify(vd); + continue; + } + } + } +} + +void X86Context::switchState(VarState* src_) { + ASMJIT_ASSERT(src_ != nullptr); + + X86VarState* cur = getState(); + X86VarState* src = static_cast(src_); + + // Ignore if both states are equal. + if (cur == src) + return; + + // Switch variables. + X86Context_switchStateVars(this, src); + X86Context_switchStateVars(this, src); + X86Context_switchStateVars(this, src); + + // Calculate changed state. + VarData** vdArray = _contextVd.getData(); + uint32_t vdCount = static_cast(_contextVd.getLength()); + + X86StateCell* cells = src->_cells; + for (uint32_t i = 0; i < vdCount; i++) { + VarData* vd = static_cast(vdArray[i]); + const X86StateCell& cell = cells[i]; + uint32_t vState = cell.getState(); + + if (vState != kVarStateReg) { + vd->setState(vState); + vd->setModified(false); + } + } + + ASMJIT_X86_CHECK_STATE +} + +// ============================================================================ +// [asmjit::X86Context - State - Intersect] +// ============================================================================ + +// The algorithm is actually not so smart, but tries to find an intersection od +// `a` and `b` and tries to move/alloc a variable into that location if it's +// possible. It also finds out which variables will be spilled/unused by `a` +// and `b` and performs that action here. It may improve the switch state code +// in certain cases, but doesn't necessarily do the best job possible. +template +static ASMJIT_INLINE void X86Context_intersectStateVars(X86Context* self, X86VarState* a, X86VarState* b) { + X86VarState* dst = self->getState(); + + VarData** dVars = dst->getListByClass(C); + VarData** aVars = a->getListByClass(C); + VarData** bVars = b->getListByClass(C); + + X86StateCell* aCells = a->_cells; + X86StateCell* bCells = b->_cells; + + uint32_t regCount = self->_regCount.get(C); + bool didWork; + + // Similar to `switchStateVars()`, we iterate over and over until there is + // no work to be done. + do { + didWork = false; + + for (uint32_t regIndex = 0, regMask = 0x1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* dVd = dVars[regIndex]; + + VarData* aVd = aVars[regIndex]; + VarData* bVd = bVars[regIndex]; + + if (dVd == aVd) + continue; + + if (dVd != nullptr) { + const X86StateCell& aCell = aCells[dVd->getLocalId()]; + const X86StateCell& bCell = bCells[dVd->getLocalId()]; + + if (aCell.getState() != kVarStateReg && bCell.getState() != kVarStateReg) { + if (aCell.getState() == kVarStateMem || bCell.getState() == kVarStateMem) + self->spill(dVd); + else + self->unuse(dVd); + + dVd = nullptr; + didWork = true; + + if (aVd == nullptr) + continue; + } + } + + if (dVd == nullptr && aVd != nullptr) { + if (aVd->getRegIndex() != kInvalidReg) + self->move(aVd, regIndex); + else + self->load(aVd, regIndex); + + didWork = true; + continue; + } + + if (dVd != nullptr) { + const X86StateCell& aCell = aCells[dVd->getLocalId()]; + const X86StateCell& bCell = bCells[dVd->getLocalId()]; + + if (aVd == nullptr) { + if (aCell.getState() == kVarStateReg || bCell.getState() == kVarStateReg) + continue; + + if (aCell.getState() == kVarStateMem || bCell.getState() == kVarStateMem) + self->spill(dVd); + else + self->unuse(dVd); + + didWork = true; + continue; + } + else if (C == kX86RegClassGp) { + if (aCell.getState() == kVarStateReg) { + if (dVd->getRegIndex() != kInvalidReg && aVd->getRegIndex() != kInvalidReg) { + self->swapGp(dVd, aVd); + + didWork = true; + continue; + } + } + } + } + } + } while (didWork); + + uint32_t dModified = dst->_modified.get(C); + uint32_t aModified = a->_modified.get(C); + + if (dModified != aModified) { + for (uint32_t regIndex = 0, regMask = 0x1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* vd = dVars[regIndex]; + + if (vd == nullptr) + continue; + + const X86StateCell& aCell = aCells[vd->getLocalId()]; + if ((dModified & regMask) && !(aModified & regMask) && aCell.getState() == kVarStateReg) + self->save(vd); + } + } +} + +void X86Context::intersectStates(VarState* a_, VarState* b_) { + X86VarState* a = static_cast(a_); + X86VarState* b = static_cast(b_); + + ASMJIT_ASSERT(a != nullptr); + ASMJIT_ASSERT(b != nullptr); + + X86Context_intersectStateVars(this, a, b); + X86Context_intersectStateVars(this, a, b); + X86Context_intersectStateVars(this, a, b); + + ASMJIT_X86_CHECK_STATE +} + +// ============================================================================ +// [asmjit::X86Context - GetJccFlow / GetOppositeJccFlow] +// ============================================================================ + +//! \internal +static ASMJIT_INLINE HLNode* X86Context_getJccFlow(HLJump* jNode) { + if (jNode->isTaken()) + return jNode->getTarget(); + else + return jNode->getNext(); +} + +//! \internal +static ASMJIT_INLINE HLNode* X86Context_getOppositeJccFlow(HLJump* jNode) { + if (jNode->isTaken()) + return jNode->getNext(); + else + return jNode->getTarget(); +} + +// ============================================================================ +// [asmjit::X86Context - SingleVarInst] +// ============================================================================ + +//! \internal +static void X86Context_prepareSingleVarInst(uint32_t instId, VarAttr* va) { + switch (instId) { + // - andn reg, reg ; Set all bits in reg to 0. + // - xor/pxor reg, reg ; Set all bits in reg to 0. + // - sub/psub reg, reg ; Set all bits in reg to 0. + // - pcmpgt reg, reg ; Set all bits in reg to 0. + // - pcmpeq reg, reg ; Set all bits in reg to 1. + case kX86InstIdPandn : + case kX86InstIdXor : case kX86InstIdXorpd : case kX86InstIdXorps : case kX86InstIdPxor : + case kX86InstIdSub: + case kX86InstIdPsubb : case kX86InstIdPsubw : case kX86InstIdPsubd : case kX86InstIdPsubq : + case kX86InstIdPsubsb : case kX86InstIdPsubsw : case kX86InstIdPsubusb : case kX86InstIdPsubusw : + case kX86InstIdPcmpeqb : case kX86InstIdPcmpeqw : case kX86InstIdPcmpeqd : case kX86InstIdPcmpeqq : + case kX86InstIdPcmpgtb : case kX86InstIdPcmpgtw : case kX86InstIdPcmpgtd : case kX86InstIdPcmpgtq : + va->andNotFlags(kVarAttrRReg); + break; + + // - and reg, reg ; Nop. + // - or reg, reg ; Nop. + // - xchg reg, reg ; Nop. + case kX86InstIdAnd : case kX86InstIdAndpd : case kX86InstIdAndps : case kX86InstIdPand : + case kX86InstIdOr : case kX86InstIdOrpd : case kX86InstIdOrps : case kX86InstIdPor : + case kX86InstIdXchg : + va->andNotFlags(kVarAttrWReg); + break; + } +} + +// ============================================================================ +// [asmjit::X86Context - Helpers] +// ============================================================================ + +//! \internal +//! +//! Get mask of all registers actually used to pass function arguments. +static ASMJIT_INLINE X86RegMask X86Context_getUsedArgs(X86Context* self, X86CallNode* node, X86FuncDecl* decl) { + X86RegMask regs; + regs.reset(); + + uint32_t i; + uint32_t argCount = decl->getNumArgs(); + + for (i = 0; i < argCount; i++) { + const FuncInOut& arg = decl->getArg(i); + if (!arg.hasRegIndex()) + continue; + regs.or_(x86VarTypeToClass(arg.getVarType()), Utils::mask(arg.getRegIndex())); + } + + return regs; +} + +// ============================================================================ +// [asmjit::X86Context - SArg Insertion] +// ============================================================================ + +struct SArgData { + VarData* sVd; + VarData* cVd; + HLCallArg* sArg; + uint32_t aType; +}; + +#define SARG(dst, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24) \ + (s0 << 0) | (s1 << 1) | (s2 << 2) | (s3 << 3) | (s4 << 4) | (s5 << 5) | (s6 << 6) | (s7 << 7) | \ + (s8 << 8) | (s9 << 9) | (s10 << 10) | (s11 << 11) | (s12 << 12) | (s13 << 13) | (s14 << 14) | (s15 << 15) | \ + (s16 << 16) | (s17 << 17) | (s18 << 18) | (s19 << 19) | (s20 << 20) | (s21 << 21) | (s22 << 22) | (s23 << 23) | \ + (s24 << 24) +#define A 0 // Auto-convert (doesn't need conversion step). +static const uint32_t X86Context_sArgConvTable[kX86VarTypeCount] = { + // dst <- | i8| u8|i16|u16|i32|u32|i64|u64| iP| uP|f32|f64|mmx| k |xmm|xSs|xPs|xSd|xPd|ymm|yPs|yPd|zmm|zPs|zPd| + //--------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + SARG(i8 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u8 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(i16 , A , A , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u16 , A , A , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(i32 , A , A , A , A , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u32 , A , A , A , A , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(i64 , A , A , A , A , A , A , 0 , 0 , A , A , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u64 , A , A , A , A , A , A , 0 , 0 , A , A , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(iPtr , A , A , A , A , A , A , A , A , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(uPtr , A , A , A , A , A , A , A , A , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(f32 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , A , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(f64 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , A , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(mmx , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(k , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(xmm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(xSs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(xPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(xSd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(xPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(ymm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(yPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(yPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(zmm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(zPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(zPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ) +}; +#undef A +#undef SARG + +static ASMJIT_INLINE bool X86Context_mustConvertSArg(X86Context* self, uint32_t aType, uint32_t sType) { + return (X86Context_sArgConvTable[aType] & (1 << sType)) != 0; +} + +static ASMJIT_INLINE uint32_t X86Context_typeOfConvertedSArg(X86Context* self, uint32_t aType, uint32_t sType) { + ASMJIT_ASSERT(X86Context_mustConvertSArg(self, aType, sType)); + + if (Utils::inInterval(aType, _kVarTypeIntStart, _kVarTypeIntEnd)) + return aType; + + if (aType == kVarTypeFp32) return kX86VarTypeXmmSs; + if (aType == kVarTypeFp64) return kX86VarTypeXmmSd; + + return aType; +} + +static ASMJIT_INLINE Error X86Context_insertHLCallArg( + X86Context* self, X86CallNode* call, + VarData* sVd, const uint32_t* gaRegs, + const FuncInOut& arg, uint32_t argIndex, + SArgData* sArgList, uint32_t& sArgCount) { + + X86Compiler* compiler = self->getCompiler(); + uint32_t i; + + uint32_t aType = arg.getVarType(); + uint32_t sType = sVd->getType(); + + // First locate or create sArgBase. + for (i = 0; i < sArgCount; i++) { + if (sArgList[i].sVd == sVd && sArgList[i].cVd == nullptr) + break; + } + + SArgData* sArgData = &sArgList[i]; + + if (i == sArgCount) { + sArgData->sVd = sVd; + sArgData->cVd = nullptr; + sArgData->sArg = nullptr; + sArgData->aType = 0xFF; + sArgCount++; + } + + const VarInfo& sInfo = _x86VarInfo[sType]; + uint32_t sClass = sInfo.getRegClass(); + + if (X86Context_mustConvertSArg(self, aType, sType)) { + uint32_t cType = X86Context_typeOfConvertedSArg(self, aType, sType); + + const VarInfo& cInfo = _x86VarInfo[cType]; + uint32_t cClass = cInfo.getRegClass(); + + while (++i < sArgCount) { + sArgData = &sArgList[i]; + if (sArgData->sVd != sVd) + break; + + if (sArgData->cVd->getType() != cType || sArgData->aType != aType) + continue; + + sArgData->sArg->_args |= Utils::mask(argIndex); + return kErrorOk; + } + + VarData* cVd = compiler->_newVd(cInfo, nullptr); + if (cVd == nullptr) + return kErrorNoHeapMemory; + + HLCallArg* sArg = compiler->newNode(call, sVd, cVd); + if (sArg == nullptr) + return kErrorNoHeapMemory; + + X86VarMap* map = self->newVarMap(2); + if (map == nullptr) + return kErrorNoHeapMemory; + + ASMJIT_PROPAGATE_ERROR(self->_registerContextVar(cVd)); + ASMJIT_PROPAGATE_ERROR(self->_registerContextVar(sVd)); + + map->_vaCount = 2; + map->_count.reset(); + map->_count.add(sClass); + map->_count.add(cClass); + + map->_start.reset(); + map->_inRegs.reset(); + map->_outRegs.reset(); + map->_clobberedRegs.reset(); + + if (sClass <= cClass) { + map->_list[0].setup(sVd, kVarAttrRReg, 0, gaRegs[sClass]); + map->_list[1].setup(cVd, kVarAttrWReg, 0, gaRegs[cClass]); + map->_start.set(cClass, sClass != cClass); + } + else { + map->_list[0].setup(cVd, kVarAttrWReg, 0, gaRegs[cClass]); + map->_list[1].setup(sVd, kVarAttrRReg, 0, gaRegs[sClass]); + map->_start.set(sClass, 1); + } + + sArg->setMap(map); + sArg->_args |= Utils::mask(argIndex); + + compiler->addNodeBefore(sArg, call); + ::memmove(sArgData + 1, sArgData, (sArgCount - i) * sizeof(SArgData)); + + sArgData->sVd = sVd; + sArgData->cVd = cVd; + sArgData->sArg = sArg; + sArgData->aType = aType; + + sArgCount++; + return kErrorOk; + } + else { + HLCallArg* sArg = sArgData->sArg; + ASMJIT_PROPAGATE_ERROR(self->_registerContextVar(sVd)); + + if (sArg == nullptr) { + sArg = compiler->newNode(call, sVd, (VarData*)nullptr); + if (sArg == nullptr) + return kErrorNoHeapMemory; + + X86VarMap* map = self->newVarMap(1); + if (map == nullptr) + return kErrorNoHeapMemory; + + map->_vaCount = 1; + map->_count.reset(); + map->_count.add(sClass); + map->_start.reset(); + map->_inRegs.reset(); + map->_outRegs.reset(); + map->_clobberedRegs.reset(); + map->_list[0].setup(sVd, kVarAttrRReg, 0, gaRegs[sClass]); + + sArg->setMap(map); + sArgData->sArg = sArg; + + compiler->addNodeBefore(sArg, call); + } + + sArg->_args |= Utils::mask(argIndex); + return kErrorOk; + } +} + +// ============================================================================ +// [asmjit::X86Context - Fetch] +// ============================================================================ + +//! \internal +//! +//! Prepare the given function `func`. +//! +//! For each node: +//! - Create and assign groupId and flowId. +//! - Collect all variables and merge them to vaList. +Error X86Context::fetch() { + ASMJIT_TLOG("[F] ======= Fetch (Begin)\n"); + + X86Compiler* compiler = getCompiler(); + X86FuncNode* func = getFunc(); + + uint32_t arch = compiler->getArch(); + + HLNode* node_ = func; + HLNode* next = nullptr; + HLNode* stop = getStop(); + + uint32_t flowId = 0; + + VarAttr vaTmpList[80]; + SArgData sArgList[80]; + + PodList::Link* jLink = nullptr; + + // Function flags. + func->clearFuncFlags( + kFuncFlagIsNaked | + kFuncFlagX86Emms | + kFuncFlagX86SFence | + kFuncFlagX86LFence ); + + if (func->getHint(kFuncHintNaked ) != 0) func->addFuncFlags(kFuncFlagIsNaked); + if (func->getHint(kFuncHintCompact ) != 0) func->addFuncFlags(kFuncFlagX86Leave); + if (func->getHint(kFuncHintX86Emms ) != 0) func->addFuncFlags(kFuncFlagX86Emms); + if (func->getHint(kFuncHintX86SFence) != 0) func->addFuncFlags(kFuncFlagX86SFence); + if (func->getHint(kFuncHintX86LFence) != 0) func->addFuncFlags(kFuncFlagX86LFence); + + // Global allocable registers. + uint32_t* gaRegs = _gaRegs; + + if (!func->hasFuncFlag(kFuncFlagIsNaked)) + gaRegs[kX86RegClassGp] &= ~Utils::mask(kX86RegIndexBp); + + // Allowed index registers (GP/XMM/YMM). + const uint32_t indexMask = Utils::bits(_regCount.getGp()) & ~(Utils::mask(4, 12)); + + // -------------------------------------------------------------------------- + // [VI Macros] + // -------------------------------------------------------------------------- + +#define VI_BEGIN() \ + do { \ + uint32_t vaCount = 0; \ + X86RegCount regCount; \ + \ + X86RegMask inRegs; \ + X86RegMask outRegs; \ + X86RegMask clobberedRegs; \ + \ + regCount.reset(); \ + inRegs.reset(); \ + outRegs.reset(); \ + clobberedRegs.reset() + +#define VI_END(_Node_) \ + if (vaCount == 0 && clobberedRegs.isEmpty()) \ + break; \ + \ + X86RegCount _vaIndex; \ + _vaIndex.indexFromRegCount(regCount); \ + \ + X86VarMap* _map = newVarMap(vaCount); \ + if (_map == nullptr) \ + goto _NoMemory; \ + \ + _map->_vaCount = vaCount; \ + _map->_count = regCount; \ + _map->_start = _vaIndex; \ + \ + _map->_inRegs = inRegs; \ + _map->_outRegs = outRegs; \ + _map->_clobberedRegs = clobberedRegs; \ + \ + VarAttr* _va = vaTmpList; \ + while (vaCount) { \ + VarData* _vd = _va->getVd(); \ + \ + uint32_t _class = _vd->getClass(); \ + uint32_t _index = _vaIndex.get(_class); \ + \ + _vaIndex.add(_class); \ + \ + if (_va->_inRegs) \ + _va->_allocableRegs = _va->_inRegs; \ + else if (_va->_outRegIndex != kInvalidReg) \ + _va->_allocableRegs = Utils::mask(_va->_outRegIndex); \ + else \ + _va->_allocableRegs &= ~inRegs.get(_class); \ + \ + _vd->_va = nullptr; \ + _map->getVa(_index)[0] = _va[0]; \ + \ + _va++; \ + vaCount--; \ + } \ + \ + _Node_->setMap(_map); \ + } while (0) + +#define VI_ADD_VAR(_Vd_, _Va_, _Flags_, _NewAllocable_) \ + do { \ + ASMJIT_ASSERT(_Vd_->_va == nullptr); \ + \ + _Va_ = &vaTmpList[vaCount++]; \ + _Va_->setup(_Vd_, _Flags_, 0, _NewAllocable_); \ + _Va_->addVarCount(1); \ + _Vd_->setVa(_Va_); \ + \ + if (_registerContextVar(_Vd_) != kErrorOk) \ + goto _NoMemory; \ + regCount.add(_Vd_->getClass()); \ + } while (0) + +#define VI_MERGE_VAR(_Vd_, _Va_, _Flags_, _NewAllocable_) \ + do { \ + _Va_ = _Vd_->getVa(); \ + \ + if (_Va_ == nullptr) { \ + _Va_ = &vaTmpList[vaCount++]; \ + _Va_->setup(_Vd_, 0, 0, _NewAllocable_); \ + _Vd_->setVa(_Va_); \ + \ + if (_registerContextVar(_Vd_) != kErrorOk) \ + goto _NoMemory; \ + regCount.add(_Vd_->getClass()); \ + } \ + \ + _Va_->orFlags(_Flags_); \ + _Va_->addVarCount(1); \ + } while (0) + + // -------------------------------------------------------------------------- + // [Loop] + // -------------------------------------------------------------------------- + + do { +_Do: + while (node_->isFetched()) { +_NextGroup: + if (jLink == nullptr) + jLink = _jccList.getFirst(); + else + jLink = jLink->getNext(); + + if (jLink == nullptr) + goto _Done; + node_ = X86Context_getOppositeJccFlow(static_cast(jLink->getValue())); + } + + flowId++; + + next = node_->getNext(); + node_->setFlowId(flowId); + + ASMJIT_TSEC({ + this->_traceNode(this, node_, "[F] "); + }); + + switch (node_->getType()) { + // ---------------------------------------------------------------------- + // [Align/Embed] + // ---------------------------------------------------------------------- + + case HLNode::kTypeAlign: + case HLNode::kTypeData: + break; + + // ---------------------------------------------------------------------- + // [Hint] + // ---------------------------------------------------------------------- + + case HLNode::kTypeHint: { + HLHint* node = static_cast(node_); + VI_BEGIN(); + + if (node->getHint() == kVarHintAlloc) { + uint32_t remain[_kX86RegClassManagedCount]; + HLHint* cur = node; + + remain[kX86RegClassGp ] = _regCount.getGp() - 1 - func->hasFuncFlag(kFuncFlagIsNaked); + remain[kX86RegClassMm ] = _regCount.getMm(); + remain[kX86RegClassK ] = _regCount.getK(); + remain[kX86RegClassXyz] = _regCount.getXyz(); + + // Merge as many alloc-hints as possible. + for (;;) { + VarData* vd = static_cast(cur->getVd()); + VarAttr* va = vd->getVa(); + + uint32_t regClass = vd->getClass(); + uint32_t regIndex = cur->getValue(); + uint32_t regMask = 0; + + // We handle both kInvalidReg and kInvalidValue. + if (regIndex < kInvalidReg) + regMask = Utils::mask(regIndex); + + if (va == nullptr) { + if (inRegs.has(regClass, regMask)) + break; + if (remain[regClass] == 0) + break; + VI_ADD_VAR(vd, va, kVarAttrRReg, gaRegs[regClass]); + + if (regMask != 0) { + inRegs.xor_(regClass, regMask); + va->setInRegs(regMask); + va->setInRegIndex(regIndex); + } + + remain[regClass]--; + } + else if (regMask != 0) { + if (inRegs.has(regClass, regMask) && va->getInRegs() != regMask) + break; + + inRegs.xor_(regClass, va->getInRegs() | regMask); + va->setInRegs(regMask); + va->setInRegIndex(regIndex); + } + + if (cur != node) + compiler->removeNode(cur); + + cur = static_cast(node->getNext()); + if (cur == nullptr || cur->getType() != HLNode::kTypeHint || cur->getHint() != kVarHintAlloc) + break; + } + + next = node->getNext(); + } + else { + VarData* vd = static_cast(node->getVd()); + VarAttr* va; + + uint32_t flags = 0; + + switch (node->getHint()) { + case kVarHintSpill: + flags = kVarAttrRMem | kVarAttrSpill; + break; + case kVarHintSave: + flags = kVarAttrRMem; + break; + case kVarHintSaveAndUnuse: + flags = kVarAttrRMem | kVarAttrUnuse; + break; + case kVarHintUnuse: + flags = kVarAttrUnuse; + break; + } + + VI_ADD_VAR(vd, va, flags, 0); + } + + VI_END(node_); + break; + } + + // ---------------------------------------------------------------------- + // [Target] + // ---------------------------------------------------------------------- + + case HLNode::kTypeLabel: { + if (node_ == func->getExitNode()) { + ASMJIT_PROPAGATE_ERROR(addReturningNode(node_)); + goto _NextGroup; + } + break; + } + + // ---------------------------------------------------------------------- + // [Inst] + // ---------------------------------------------------------------------- + + case HLNode::kTypeInst: { + HLInst* node = static_cast(node_); + + uint32_t instId = node->getInstId(); + uint32_t flags = node->getFlags(); + + Operand* opList = node->getOpList(); + uint32_t opCount = node->getOpCount(); + + if (opCount) { + const X86InstExtendedInfo& extendedInfo = _x86InstInfo[instId].getExtendedInfo(); + const X86SpecialInst* special = nullptr; + VI_BEGIN(); + + // Collect instruction flags and merge all 'VarAttr's. + if (extendedInfo.isFp()) + flags |= HLNode::kFlagIsFp; + + if (extendedInfo.isSpecial() && (special = X86SpecialInst_get(instId, opList, opCount)) != nullptr) + flags |= HLNode::kFlagIsSpecial; + + uint32_t gpAllowedMask = 0xFFFFFFFF; + + for (uint32_t i = 0; i < opCount; i++) { + Operand* op = &opList[i]; + VarData* vd; + VarAttr* va; + + if (op->isVar()) { + vd = compiler->getVdById(op->getId()); + VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask); + + if (static_cast(op)->isGpb()) { + va->orFlags(static_cast(op)->isGpbLo() ? kVarAttrX86GpbLo : kVarAttrX86GpbHi); + if (arch == kArchX86) { + // If a byte register is accessed in 32-bit mode we have to limit + // all allocable registers for that variable to eax/ebx/ecx/edx. + // Other variables are not affected. + va->_allocableRegs &= 0x0F; + } + else { + // It's fine if lo-byte register is accessed in 64-bit mode; + // however, hi-byte has to be checked and if it's used all + // registers (GP/XMM) could be only allocated in the lower eight + // half. To do that, we patch 'allocableRegs' of all variables + // we collected until now and change the allocable restriction + // for variables that come after. + if (static_cast(op)->isGpbHi()) { + va->_allocableRegs &= 0x0F; + + if (gpAllowedMask != 0xFF) { + for (uint32_t j = 0; j < i; j++) + vaTmpList[j]._allocableRegs &= vaTmpList[j].hasFlag(kVarAttrX86GpbHi) ? 0x0F : 0xFF; + gpAllowedMask = 0xFF; + } + } + } + } + + if (special != nullptr) { + uint32_t inReg = special[i].inReg; + uint32_t outReg = special[i].outReg; + uint32_t c; + + if (static_cast(op)->isGp()) + c = kX86RegClassGp; + else + c = kX86RegClassXyz; + + if (inReg != kInvalidReg) { + uint32_t mask = Utils::mask(inReg); + inRegs.or_(c, mask); + va->addInRegs(mask); + } + + if (outReg != kInvalidReg) { + uint32_t mask = Utils::mask(outReg); + outRegs.or_(c, mask); + va->setOutRegIndex(outReg); + } + + va->orFlags(special[i].flags); + } + else { + uint32_t inFlags = kVarAttrRReg; + uint32_t outFlags = kVarAttrWReg; + uint32_t combinedFlags; + + if (i == 0) { + // Read/Write is usually the combination of the first operand. + combinedFlags = inFlags | outFlags; + + if (node->getOptions() & kInstOptionOverwrite) { + // Manually forcing write-only. + combinedFlags = outFlags; + } + else if (extendedInfo.isWO()) { + // Write-only instruction. + uint32_t movSize = extendedInfo.getWriteSize(); + uint32_t varSize = vd->getSize(); + + // Exception - If the source operand is a memory location + // promote move size into 16 bytes. + if (extendedInfo.isZeroIfMem() && opList[1].isMem()) + movSize = 16; + + if (static_cast(op)->isGp()) { + uint32_t opSize = static_cast(op)->getSize(); + + // Move size is zero in case that it should be determined + // from the destination register. + if (movSize == 0) + movSize = opSize; + + // Handle the case that a 32-bit operation in 64-bit mode + // always clears the rest of the destination register and + // the case that move size is actually greater than or + // equal to the size of the variable. + if (movSize >= 4 || movSize >= varSize) + combinedFlags = outFlags; + } + else if (movSize >= varSize) { + // If move size is greater than or equal to the size of + // the variable there is nothing to do, because the move + // will overwrite the variable in all cases. + combinedFlags = outFlags; + } + } + else if (extendedInfo.isRO()) { + // Comparison/Test instructions don't modify any operand. + combinedFlags = inFlags; + } + else if (instId == kX86InstIdImul && opCount == 3) { + // Imul. + combinedFlags = outFlags; + } + } + else { + // Read-Only is usualy the combination of the second/third/fourth operands. + combinedFlags = inFlags; + + // Idiv is a special instruction, never handled here. + ASMJIT_ASSERT(instId != kX86InstIdIdiv); + + // Xchg/Xadd/Imul. + if (extendedInfo.isXchg() || (instId == kX86InstIdImul && opCount == 3 && i == 1)) + combinedFlags = inFlags | outFlags; + } + va->orFlags(combinedFlags); + } + } + else if (op->isMem()) { + X86Mem* m = static_cast(op); + node->setMemOpIndex(i); + + if (OperandUtil::isVarId(m->getBase()) && m->isBaseIndexType()) { + vd = compiler->getVdById(m->getBase()); + if (!vd->isStack()) { + VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask); + if (m->getMemType() == kMemTypeBaseIndex) { + va->orFlags(kVarAttrRReg); + } + else { + uint32_t inFlags = kVarAttrRMem; + uint32_t outFlags = kVarAttrWMem; + uint32_t combinedFlags; + + if (i == 0) { + // Default for the first operand. + combinedFlags = inFlags | outFlags; + + if (extendedInfo.isWO()) { + // Move to memory - setting the right flags is important + // as if it's just move to the register. It's just a bit + // simpler as there are no special cases. + uint32_t movSize = Utils::iMax(extendedInfo.getWriteSize(), m->getSize()); + uint32_t varSize = vd->getSize(); + + if (movSize >= varSize) + combinedFlags = outFlags; + } + else if (extendedInfo.isRO()) { + // Comparison/Test instructions don't modify any operand. + combinedFlags = inFlags; + } + } + else { + // Default for the second operand. + combinedFlags = inFlags; + + // Handle Xchg instruction (modifies both operands). + if (extendedInfo.isXchg()) + combinedFlags = inFlags | outFlags; + } + + va->orFlags(combinedFlags); + } + } + } + + if (OperandUtil::isVarId(m->getIndex())) { + // Restrict allocation to all registers except ESP/RSP/R12. + vd = compiler->getVdById(m->getIndex()); + VI_MERGE_VAR(vd, va, 0, gaRegs[kX86RegClassGp] & gpAllowedMask); + va->andAllocableRegs(indexMask); + va->orFlags(kVarAttrRReg); + } + } + } + + node->setFlags(flags); + if (vaCount) { + // Handle instructions which result in zeros/ones or nop if used with the + // same destination and source operand. + if (vaCount == 1 && opCount >= 2 && opList[0].isVar() && opList[1].isVar() && !node->hasMemOp()) + X86Context_prepareSingleVarInst(instId, &vaTmpList[0]); + } + + VI_END(node_); + } + + // Handle conditional/unconditional jump. + if (node->isJmpOrJcc()) { + HLJump* jNode = static_cast(node); + HLLabel* jTarget = jNode->getTarget(); + + // If this jump is unconditional we put next node to unreachable node + // list so we can eliminate possible dead code. We have to do this in + // all cases since we are unable to translate without fetch() step. + // + // We also advance our node pointer to the target node to simulate + // natural flow of the function. + if (jNode->isJmp()) { + if (!next->isFetched()) + ASMJIT_PROPAGATE_ERROR(addUnreachableNode(next)); + + // Jump not followed. + if (jTarget == nullptr) { + ASMJIT_PROPAGATE_ERROR(addReturningNode(jNode)); + goto _NextGroup; + } + + node_ = jTarget; + goto _Do; + } + else { + // Jump not followed. + if (jTarget == nullptr) + break; + + if (jTarget->isFetched()) { + uint32_t jTargetFlowId = jTarget->getFlowId(); + + // Update HLNode::kFlagIsTaken flag to true if this is a + // conditional backward jump. This behavior can be overridden + // by using `kInstOptionTaken` when the instruction is created. + if (!jNode->isTaken() && opCount == 1 && jTargetFlowId <= flowId) { + jNode->orFlags(HLNode::kFlagIsTaken); + } + } + else if (next->isFetched()) { + node_ = jTarget; + goto _Do; + } + else { + ASMJIT_PROPAGATE_ERROR(addJccNode(jNode)); + node_ = X86Context_getJccFlow(jNode); + goto _Do; + } + } + } + break; + } + + // ---------------------------------------------------------------------- + // [Func] + // ---------------------------------------------------------------------- + + case HLNode::kTypeFunc: { + ASMJIT_ASSERT(node_ == func); + X86FuncDecl* decl = func->getDecl(); + + VI_BEGIN(); + for (uint32_t i = 0, argCount = decl->getNumArgs(); i < argCount; i++) { + const FuncInOut& arg = decl->getArg(i); + + VarData* vd = func->getArg(i); + VarAttr* va; + + if (vd == nullptr) + continue; + + // Overlapped function arguments. + if (vd->getVa() != nullptr) + return compiler->setLastError(kErrorOverlappedArgs); + VI_ADD_VAR(vd, va, 0, 0); + + uint32_t aType = arg.getVarType(); + uint32_t vType = vd->getType(); + + if (arg.hasRegIndex()) { + if (x86VarTypeToClass(aType) == vd->getClass()) { + va->orFlags(kVarAttrWReg); + va->setOutRegIndex(arg.getRegIndex()); + } + else { + va->orFlags(kVarAttrWConv); + } + } + else { + if ((x86VarTypeToClass(aType) == vd->getClass()) || + (vType == kX86VarTypeXmmSs && aType == kVarTypeFp32) || + (vType == kX86VarTypeXmmSd && aType == kVarTypeFp64)) { + va->orFlags(kVarAttrWMem); + } + else { + // TODO: [COMPILER] Not implemented. + ASMJIT_ASSERT(!"Implemented"); + } + } + } + VI_END(node_); + break; + } + + // ---------------------------------------------------------------------- + // [End] + // ---------------------------------------------------------------------- + + case HLNode::kTypeSentinel: { + ASMJIT_PROPAGATE_ERROR(addReturningNode(node_)); + goto _NextGroup; + } + + // ---------------------------------------------------------------------- + // [Ret] + // ---------------------------------------------------------------------- + + case HLNode::kTypeRet: { + HLRet* node = static_cast(node_); + ASMJIT_PROPAGATE_ERROR(addReturningNode(node)); + + X86FuncDecl* decl = func->getDecl(); + if (decl->hasRet()) { + const FuncInOut& ret = decl->getRet(0); + uint32_t retClass = x86VarTypeToClass(ret.getVarType()); + + VI_BEGIN(); + for (uint32_t i = 0; i < 2; i++) { + Operand* op = &node->_ret[i]; + + if (op->isVar()) { + VarData* vd = compiler->getVdById(op->getId()); + VarAttr* va; + + VI_MERGE_VAR(vd, va, 0, 0); + + if (retClass == vd->getClass()) { + // TODO: [COMPILER] Fix HLRet fetch. + va->orFlags(kVarAttrRReg); + va->setInRegs(i == 0 ? Utils::mask(kX86RegIndexAx) : Utils::mask(kX86RegIndexDx)); + inRegs.or_(retClass, va->getInRegs()); + } + else if (retClass == kX86RegClassFp) { + uint32_t fldFlag = ret.getVarType() == kVarTypeFp32 ? kVarAttrX86Fld4 : kVarAttrX86Fld8; + va->orFlags(kVarAttrRMem | fldFlag); + } + else { + // TODO: Fix possible other return type conversions. + ASMJIT_NOT_REACHED(); + } + } + } + VI_END(node_); + } + + if (!next->isFetched()) + ASMJIT_PROPAGATE_ERROR(addUnreachableNode(next)); + goto _NextGroup; + } + + // ---------------------------------------------------------------------- + // [Call] + // ---------------------------------------------------------------------- + + case HLNode::kTypeCall: { + X86CallNode* node = static_cast(node_); + X86FuncDecl* decl = node->getDecl(); + + Operand* target = &node->_target; + Operand* args = node->_args; + Operand* rets = node->_ret; + + func->addFuncFlags(kFuncFlagIsCaller); + func->mergeCallStackSize(node->_x86Decl.getArgStackSize()); + node->_usedArgs = X86Context_getUsedArgs(this, node, decl); + + uint32_t i; + uint32_t argCount = decl->getNumArgs(); + uint32_t sArgCount = 0; + uint32_t gpAllocableMask = gaRegs[kX86RegClassGp] & ~node->_usedArgs.get(kX86RegClassGp); + + VarData* vd; + VarAttr* va; + + VI_BEGIN(); + + // Function-call operand. + if (target->isVar()) { + vd = compiler->getVdById(target->getId()); + VI_MERGE_VAR(vd, va, 0, 0); + + va->orFlags(kVarAttrRReg | kVarAttrRCall); + if (va->getInRegs() == 0) + va->addAllocableRegs(gpAllocableMask); + } + else if (target->isMem()) { + X86Mem* m = static_cast(target); + + if (OperandUtil::isVarId(m->getBase()) && m->isBaseIndexType()) { + vd = compiler->getVdById(m->getBase()); + if (!vd->isStack()) { + VI_MERGE_VAR(vd, va, 0, 0); + if (m->getMemType() == kMemTypeBaseIndex) { + va->orFlags(kVarAttrRReg | kVarAttrRCall); + if (va->getInRegs() == 0) + va->addAllocableRegs(gpAllocableMask); + } + else { + va->orFlags(kVarAttrRMem | kVarAttrRCall); + } + } + } + + if (OperandUtil::isVarId(m->getIndex())) { + // Restrict allocation to all registers except ESP/RSP/R12. + vd = compiler->getVdById(m->getIndex()); + VI_MERGE_VAR(vd, va, 0, 0); + + va->orFlags(kVarAttrRReg | kVarAttrRCall); + if ((va->getInRegs() & ~indexMask) == 0) + va->andAllocableRegs(gpAllocableMask & indexMask); + } + } + + // Function-call arguments. + for (i = 0; i < argCount; i++) { + Operand* op = &args[i]; + if (!op->isVar()) + continue; + + vd = compiler->getVdById(op->getId()); + const FuncInOut& arg = decl->getArg(i); + + if (arg.hasRegIndex()) { + VI_MERGE_VAR(vd, va, 0, 0); + + uint32_t argType = arg.getVarType(); + uint32_t argClass = x86VarTypeToClass(argType); + + if (vd->getClass() == argClass) { + va->addInRegs(Utils::mask(arg.getRegIndex())); + va->orFlags(kVarAttrRReg | kVarAttrRFunc); + } + else { + va->orFlags(kVarAttrRConv | kVarAttrRFunc); + } + } + // If this is a stack-based argument we insert HLCallArg instead of + // using VarAttr. It improves the code, because the argument can be + // moved onto stack as soon as it is ready and the register used by + // the variable can be reused for something else. It is also much + // easier to handle argument conversions, because there will be at + // most only one node per conversion. + else { + if (X86Context_insertHLCallArg(this, node, vd, gaRegs, arg, i, sArgList, sArgCount) != kErrorOk) + goto _NoMemory; + } + } + + // Function-call return(s). + for (i = 0; i < 2; i++) { + Operand* op = &rets[i]; + if (!op->isVar()) + continue; + + const FuncInOut& ret = decl->getRet(i); + if (ret.hasRegIndex()) { + uint32_t retType = ret.getVarType(); + uint32_t retClass = x86VarTypeToClass(retType); + + vd = compiler->getVdById(op->getId()); + VI_MERGE_VAR(vd, va, 0, 0); + + if (vd->getClass() == retClass) { + va->setOutRegIndex(ret.getRegIndex()); + va->orFlags(kVarAttrWReg | kVarAttrWFunc); + } + else { + va->orFlags(kVarAttrWConv | kVarAttrWFunc); + } + } + } + + // Init clobbered. + clobberedRegs.set(kX86RegClassGp , Utils::bits(_regCount.getGp()) & (~decl->getPreserved(kX86RegClassGp ))); + clobberedRegs.set(kX86RegClassMm , Utils::bits(_regCount.getMm()) & (~decl->getPreserved(kX86RegClassMm ))); + clobberedRegs.set(kX86RegClassK , Utils::bits(_regCount.getK()) & (~decl->getPreserved(kX86RegClassK ))); + clobberedRegs.set(kX86RegClassXyz, Utils::bits(_regCount.getXyz()) & (~decl->getPreserved(kX86RegClassXyz))); + + VI_END(node_); + break; + } + + default: + break; + } + + node_ = next; + } while (node_ != stop); + +_Done: + // Mark exit label and end node as fetched, otherwise they can be removed by + // `removeUnreachableCode()`, which would lead to crash in some later step. + node_ = func->getEnd(); + if (!node_->isFetched()) { + func->getExitNode()->setFlowId(++flowId); + node_->setFlowId(++flowId); + } + + ASMJIT_TLOG("[F] ======= Fetch (Done)\n"); + return kErrorOk; + + // -------------------------------------------------------------------------- + // [Failure] + // -------------------------------------------------------------------------- + +_NoMemory: + ASMJIT_TLOG("[F] ======= Fetch (Out of Memory)\n"); + return compiler->setLastError(kErrorNoHeapMemory); +} + +// ============================================================================ +// [asmjit::X86Context - Annotate] +// ============================================================================ + +Error X86Context::annotate() { +#if !defined(ASMJIT_DISABLE_LOGGER) + HLFunc* func = getFunc(); + + HLNode* node_ = func; + HLNode* end = func->getEnd(); + + Zone& sa = _compiler->_stringAllocator; + StringBuilderTmp<128> sb; + + uint32_t maxLen = 0; + while (node_ != end) { + if (node_->getComment() == nullptr) { + if (node_->getType() == HLNode::kTypeInst) { + HLInst* node = static_cast(node_); + X86Context_annotateInstruction(this, sb, node->getInstId(), node->getOpList(), node->getOpCount()); + + node_->setComment(static_cast(sa.dup(sb.getData(), sb.getLength() + 1))); + maxLen = Utils::iMax(maxLen, static_cast(sb.getLength())); + + sb.clear(); + } + } + + node_ = node_->getNext(); + } + _annotationLength = maxLen + 1; +#endif // !ASMJIT_DISABLE_LOGGER + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86BaseAlloc] +// ============================================================================ + +struct X86BaseAlloc { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86BaseAlloc(X86Context* context) { + _context = context; + _compiler = context->getCompiler(); + } + ASMJIT_INLINE ~X86BaseAlloc() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the context. + ASMJIT_INLINE X86Context* getContext() const { return _context; } + //! Get the current state (always the same instance as X86Context::_x86State). + ASMJIT_INLINE X86VarState* getState() const { return _context->getState(); } + + //! Get the node. + ASMJIT_INLINE HLNode* getNode() const { return _node; } + + //! Get VarAttr list (all). + ASMJIT_INLINE VarAttr* getVaList() const { return _vaList[0]; } + //! Get VarAttr list (per class). + ASMJIT_INLINE VarAttr* getVaListByClass(uint32_t rc) const { return _vaList[rc]; } + + //! Get VarAttr count (all). + ASMJIT_INLINE uint32_t getVaCount() const { return _vaCount; } + //! Get VarAttr count (per class). + ASMJIT_INLINE uint32_t getVaCountByClass(uint32_t rc) const { return _count.get(rc); } + + //! Get whether all variables of class `c` are done. + ASMJIT_INLINE bool isVaDone(uint32_t rc) const { return _done.get(rc) == _count.get(rc); } + + //! Get how many variables have been allocated. + ASMJIT_INLINE uint32_t getVaDone(uint32_t rc) const { return _done.get(rc); } + //! Add to the count of variables allocated. + ASMJIT_INLINE void addVaDone(uint32_t rc, uint32_t n = 1) { _done.add(rc, n); } + + //! Get number of allocable registers per class. + ASMJIT_INLINE uint32_t getGaRegs(uint32_t rc) const { + return _context->_gaRegs[rc]; + } + + // -------------------------------------------------------------------------- + // [Init / Cleanup] + // -------------------------------------------------------------------------- + +protected: + // Just to prevent calling these methods by X86Context::translate(). + + ASMJIT_INLINE void init(HLNode* node, X86VarMap* map); + ASMJIT_INLINE void cleanup(); + + // -------------------------------------------------------------------------- + // [Unuse] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void unuseBefore(); + + template + ASMJIT_INLINE void unuseAfter(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Context. + X86Context* _context; + //! Compiler. + X86Compiler* _compiler; + + //! Node. + HLNode* _node; + + //! Variable map. + X86VarMap* _map; + //! VarAttr list (per register class). + VarAttr* _vaList[_kX86RegClassManagedCount]; + + //! Count of all VarAttr's. + uint32_t _vaCount; + + //! VarAttr's total counter. + X86RegCount _count; + //! VarAttr's done counter. + X86RegCount _done; +}; + +// ============================================================================ +// [asmjit::X86BaseAlloc - Init / Cleanup] +// ============================================================================ + +ASMJIT_INLINE void X86BaseAlloc::init(HLNode* node, X86VarMap* map) { + _node = node; + _map = map; + + // We have to set the correct cursor in case any instruction is emitted + // during the allocation phase; it has to be emitted before the current + // instruction. + _compiler->_setCursor(node->getPrev()); + + // Setup the lists of variables. + { + VarAttr* va = map->getVaList(); + _vaList[kX86RegClassGp ] = va; + _vaList[kX86RegClassMm ] = va + map->getVaStart(kX86RegClassMm ); + _vaList[kX86RegClassK ] = va + map->getVaStart(kX86RegClassK ); + _vaList[kX86RegClassXyz] = va + map->getVaStart(kX86RegClassXyz); + } + + // Setup counters. + _vaCount = map->getVaCount(); + + _count = map->_count; + _done.reset(); + + // Connect Vd->Va. + for (uint32_t i = 0; i < _vaCount; i++) { + VarAttr* va = &_vaList[0][i]; + VarData* vd = va->getVd(); + + vd->setVa(va); + } +} + +ASMJIT_INLINE void X86BaseAlloc::cleanup() { + // Disconnect Vd->Va. + for (uint32_t i = 0; i < _vaCount; i++) { + VarAttr* va = &_vaList[0][i]; + VarData* vd = va->getVd(); + + vd->setVa(nullptr); + } +} + +// ============================================================================ +// [asmjit::X86BaseAlloc - Unuse] +// ============================================================================ + +template +ASMJIT_INLINE void X86BaseAlloc::unuseBefore() { + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + const uint32_t checkFlags = + kVarAttrXReg | + kVarAttrRMem | + kVarAttrRFunc | + kVarAttrRCall | + kVarAttrRConv ; + + for (uint32_t i = 0; i < count; i++) { + VarAttr* va = &list[i]; + + if ((va->getFlags() & checkFlags) == kVarAttrWReg) { + _context->unuse(va->getVd()); + } + } +} + +template +ASMJIT_INLINE void X86BaseAlloc::unuseAfter() { + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + for (uint32_t i = 0; i < count; i++) { + VarAttr* va = &list[i]; + + if (va->getFlags() & kVarAttrUnuse) + _context->unuse(va->getVd()); + } +} + +// ============================================================================ +// [asmjit::X86VarAlloc] +// ============================================================================ + +//! \internal +//! +//! Register allocator context (asm instructions). +struct X86VarAlloc : public X86BaseAlloc { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86VarAlloc(X86Context* context) : X86BaseAlloc(context) {} + ASMJIT_INLINE ~X86VarAlloc() {} + + // -------------------------------------------------------------------------- + // [Run] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Error run(HLNode* node); + + // -------------------------------------------------------------------------- + // [Init / Cleanup] + // -------------------------------------------------------------------------- + +protected: + // Just to prevent calling these methods by X86Context::translate(). + + ASMJIT_INLINE void init(HLNode* node, X86VarMap* map); + ASMJIT_INLINE void cleanup(); + + // -------------------------------------------------------------------------- + // [Plan / Spill / Alloc] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void plan(); + + template + ASMJIT_INLINE void spill(); + + template + ASMJIT_INLINE void alloc(); + + // -------------------------------------------------------------------------- + // [GuessAlloc / GuessSpill] + // -------------------------------------------------------------------------- + + //! Guess which register is the best candidate for 'vd' from + //! 'allocableRegs'. + //! + //! The guess is based on looking ahead and inspecting register allocator + //! instructions. The main reason is to prevent allocation to a register + //! which is needed by next instruction(s). The guess look tries to go as far + //! as possible, after the remaining registers are zero, the mask of previous + //! registers (called 'safeRegs') is returned. + template + ASMJIT_INLINE uint32_t guessAlloc(VarData* vd, uint32_t allocableRegs); + + //! Guess whether to move the given 'vd' instead of spill. + template + ASMJIT_INLINE uint32_t guessSpill(VarData* vd, uint32_t allocableRegs); + + // -------------------------------------------------------------------------- + // [Modified] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void modified(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Will alloc to these registers. + X86RegMask _willAlloc; + //! Will spill these registers. + X86RegMask _willSpill; +}; + +// ============================================================================ +// [asmjit::X86VarAlloc - Run] +// ============================================================================ + +ASMJIT_INLINE Error X86VarAlloc::run(HLNode* node_) { + // Initialize. + X86VarMap* map = node_->getMap(); + if (map == nullptr) + return kErrorOk; + + // Initialize the allocator; connect Vd->Va. + init(node_, map); + + // Unuse overwritten variables. + unuseBefore(); + unuseBefore(); + unuseBefore(); + + // Plan the allocation. Planner assigns input/output registers for each + // variable and decides whether to allocate it in register or stack. + plan(); + plan(); + plan(); + + // Spill all variables marked by plan(). + spill(); + spill(); + spill(); + + // Alloc all variables marked by plan(). + alloc(); + alloc(); + alloc(); + + // Translate node operands. + if (node_->getType() == HLNode::kTypeInst) { + HLInst* node = static_cast(node_); + ASMJIT_PROPAGATE_ERROR(X86Context_translateOperands(_context, node->getOpList(), node->getOpCount())); + } + else if (node_->getType() == HLNode::kTypeCallArg) { + HLCallArg* node = static_cast(node_); + + X86CallNode* call = static_cast(node->getCall()); + X86FuncDecl* decl = call->getDecl(); + + uint32_t argIndex = 0; + uint32_t argMask = node->_args; + + VarData* sVd = node->getSVd(); + VarData* cVd = node->getCVd(); + + // Convert first. + ASMJIT_ASSERT(sVd->getRegIndex() != kInvalidReg); + + if (cVd != nullptr) { + ASMJIT_ASSERT(cVd->getRegIndex() != kInvalidReg); + _context->emitConvertVarToVar( + cVd->getType(), cVd->getRegIndex(), + sVd->getType(), sVd->getRegIndex()); + sVd = cVd; + } + + while (argMask != 0) { + if (argMask & 0x1) { + FuncInOut& arg = decl->getArg(argIndex); + ASMJIT_ASSERT(arg.hasStackOffset()); + + X86Mem dst = x86::ptr(_context->_zsp, -static_cast(_context->getRegSize()) + arg.getStackOffset()); + _context->emitMoveVarOnStack(arg.getVarType(), &dst, sVd->getType(), sVd->getRegIndex()); + } + + argIndex++; + argMask >>= 1; + } + } + + // Mark variables as modified. + modified(); + modified(); + modified(); + + // Cleanup; disconnect Vd->Va. + cleanup(); + + // Update clobbered mask. + _context->_clobberedRegs.or_(_willAlloc); + _context->_clobberedRegs.or_(map->_clobberedRegs); + + // Unuse. + unuseAfter(); + unuseAfter(); + unuseAfter(); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86VarAlloc - Init / Cleanup] +// ============================================================================ + +ASMJIT_INLINE void X86VarAlloc::init(HLNode* node, X86VarMap* map) { + X86BaseAlloc::init(node, map); + + // These will block planner from assigning them during planning. Planner will + // add more registers when assigning registers to variables that don't need + // any specific register. + _willAlloc = map->_inRegs; + _willAlloc.or_(map->_outRegs); + _willSpill.reset(); +} + +ASMJIT_INLINE void X86VarAlloc::cleanup() { + X86BaseAlloc::cleanup(); +} + +// ============================================================================ +// [asmjit::X86VarAlloc - Plan / Spill / Alloc] +// ============================================================================ + +template +ASMJIT_INLINE void X86VarAlloc::plan() { + if (isVaDone(C)) + return; + + uint32_t i; + uint32_t willAlloc = _willAlloc.get(C); + uint32_t willFree = 0; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + X86VarState* state = getState(); + + // Calculate 'willAlloc' and 'willFree' masks based on mandatory masks. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = (regIndex != kInvalidReg) ? Utils::mask(regIndex) : 0; + + if ((vaFlags & kVarAttrXReg) != 0) { + // Planning register allocation. First check whether the variable is + // already allocated in register and if it can stay allocated there. + // + // The following conditions may happen: + // + // a) Allocated register is one of the mandatoryRegs. + // b) Allocated register is one of the allocableRegs. + uint32_t mandatoryRegs = va->getInRegs(); + uint32_t allocableRegs = va->getAllocableRegs(); + + ASMJIT_TLOG("[RA-PLAN] %s (%s)\n", + vd->getName(), + (vaFlags & kVarAttrXReg) == kVarAttrWReg ? "R-Reg" : "X-Reg"); + + ASMJIT_TLOG("[RA-PLAN] RegMask=%08X Mandatory=%08X Allocable=%08X\n", + regMask, mandatoryRegs, allocableRegs); + + if (regMask != 0) { + // Special path for planning output-only registers. + if ((vaFlags & kVarAttrXReg) == kVarAttrWReg) { + uint32_t outRegIndex = va->getOutRegIndex(); + mandatoryRegs = (outRegIndex != kInvalidReg) ? Utils::mask(outRegIndex) : 0; + + if ((mandatoryRegs | allocableRegs) & regMask) { + va->setOutRegIndex(regIndex); + va->orFlags(kVarAttrAllocWDone); + + if (mandatoryRegs & regMask) { + // Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's. + ASMJIT_ASSERT((willAlloc & regMask) != 0); + } + else { + // Case 'b'. + va->setOutRegIndex(regIndex); + willAlloc |= regMask; + } + + ASMJIT_TLOG("[RA-PLAN] WillAlloc\n"); + addVaDone(C); + + continue; + } + } + else { + if ((mandatoryRegs | allocableRegs) & regMask) { + va->setInRegIndex(regIndex); + va->orFlags(kVarAttrAllocRDone); + + if (mandatoryRegs & regMask) { + // Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's. + ASMJIT_ASSERT((willAlloc & regMask) != 0); + } + else { + // Case 'b'. + va->addInRegs(regMask); + willAlloc |= regMask; + } + + ASMJIT_TLOG("[RA-PLAN] WillAlloc\n"); + addVaDone(C); + + continue; + } + } + + // Trace it here so we don't pollute log by `WillFree` of zero regMask. + ASMJIT_TLOG("[RA-PLAN] WillFree\n"); + } + + // Variable is not allocated or allocated in register that doesn't + // match inRegs or allocableRegs. The next step is to pick the best + // register for this variable. If `inRegs` contains any register the + // decision is simple - we have to follow, in other case will use + // the advantage of `guessAlloc()` to find a register (or registers) + // by looking ahead. But the best way to find a good register is not + // here since now we have no information about the registers that + // will be freed. So instead of finding register here, we just mark + // the current register (if variable is allocated) as `willFree` so + // the planner can use this information in the second step to plan the + // allocation as a whole. + willFree |= regMask; + continue; + } + else { + // Memory access - if variable is allocated it has to be freed. + ASMJIT_TLOG("[RA-PLAN] %s (Memory)\n", vd->getName()); + + if (regMask != 0) { + ASMJIT_TLOG("[RA-PLAN] WillFree\n"); + willFree |= regMask; + continue; + } + else { + ASMJIT_TLOG("[RA-PLAN] Done\n"); + va->orFlags(kVarAttrAllocRDone); + addVaDone(C); + continue; + } + } + } + + // Occupied registers without 'willFree' registers; contains basically + // all the registers we can use to allocate variables without inRegs + // speficied. + uint32_t occupied = state->_occupied.get(C) & ~willFree; + uint32_t willSpill = 0; + + // Find the best registers for variables that are not allocated yet. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + + if ((vaFlags & kVarAttrXReg) != 0) { + if ((vaFlags & kVarAttrXReg) == kVarAttrWReg) { + if (vaFlags & kVarAttrAllocWDone) + continue; + + // Skip all registers that have assigned outRegIndex. Spill if occupied. + if (va->hasOutRegIndex()) { + uint32_t outRegs = Utils::mask(va->getOutRegIndex()); + willSpill |= occupied & outRegs; + continue; + } + } + else { + if (vaFlags & kVarAttrAllocRDone) + continue; + + // We skip all registers that have assigned inRegIndex, indicates that + // the register to allocate in is known. + if (va->hasInRegIndex()) { + uint32_t inRegs = va->getInRegs(); + willSpill |= occupied & inRegs; + continue; + } + } + + uint32_t m = va->getInRegs(); + if (va->hasOutRegIndex()) + m |= Utils::mask(va->getOutRegIndex()); + + m = va->getAllocableRegs() & ~(willAlloc ^ m); + m = guessAlloc(vd, m); + ASMJIT_ASSERT(m != 0); + + uint32_t candidateRegs = m & ~occupied; + uint32_t homeMask = vd->getHomeMask(); + + uint32_t regIndex; + uint32_t regMask; + + if (candidateRegs == 0) { + candidateRegs = m & occupied & ~state->_modified.get(C); + if (candidateRegs == 0) + candidateRegs = m; + } + + // printf("CANDIDATE: %s %08X\n", vd->getName(), homeMask); + if (candidateRegs & homeMask) + candidateRegs &= homeMask; + + regIndex = Utils::findFirstBit(candidateRegs); + regMask = Utils::mask(regIndex); + + if ((vaFlags & kVarAttrXReg) == kVarAttrWReg) { + va->setOutRegIndex(regIndex); + } + else { + va->setInRegIndex(regIndex); + va->setInRegs(regMask); + } + + willAlloc |= regMask; + willSpill |= regMask & occupied; + willFree &=~regMask; + occupied |= regMask; + + continue; + } + else if ((vaFlags & kVarAttrXMem) != 0) { + uint32_t regIndex = vd->getRegIndex(); + if (regIndex != kInvalidReg && (vaFlags & kVarAttrXMem) != kVarAttrWMem) { + willSpill |= Utils::mask(regIndex); + } + } + } + + // Set calculated masks back to the allocator; needed by spill() and alloc(). + _willSpill.set(C, willSpill); + _willAlloc.set(C, willAlloc); +} + +template +ASMJIT_INLINE void X86VarAlloc::spill() { + uint32_t m = _willSpill.get(C); + uint32_t i = static_cast(0) - 1; + + if (m == 0) + return; + + X86VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + // Available registers for decision if move has any benefit over spill. + uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C)); + + do { + // We always advance one more to destroy the bit that we have found. + uint32_t bitIndex = Utils::findFirstBit(m) + 1; + + i += bitIndex; + m >>= bitIndex; + + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != nullptr); + + VarAttr* va = vd->getVa(); + ASMJIT_ASSERT(va == nullptr || !va->hasFlag(kVarAttrXReg)); + + if (vd->isModified() && availableRegs) { + // Don't check for alternatives if the variable has to be spilled. + if (va == nullptr || !va->hasFlag(kVarAttrSpill)) { + uint32_t altRegs = guessSpill(vd, availableRegs); + + if (altRegs != 0) { + uint32_t regIndex = Utils::findFirstBit(altRegs); + uint32_t regMask = Utils::mask(regIndex); + + _context->move(vd, regIndex); + availableRegs ^= regMask; + continue; + } + } + } + + _context->spill(vd); + } while (m != 0); +} + +template +ASMJIT_INLINE void X86VarAlloc::alloc() { + if (isVaDone(C)) + return; + + uint32_t i; + bool didWork; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + // Alloc 'in' regs. + do { + didWork = false; + for (i = 0; i < count; i++) { + VarAttr* aVa = &list[i]; + VarData* aVd = aVa->getVd(); + + if ((aVa->getFlags() & (kVarAttrRReg | kVarAttrAllocRDone)) != kVarAttrRReg) + continue; + + uint32_t aIndex = aVd->getRegIndex(); + uint32_t bIndex = aVa->getInRegIndex(); + + // Shouldn't be the same. + ASMJIT_ASSERT(aIndex != bIndex); + + VarData* bVd = getState()->getListByClass(C)[bIndex]; + if (bVd != nullptr) { + // Gp registers only - Swap two registers if we can solve two + // allocation tasks by a single 'xchg' instruction, swapping + // two registers required by the instruction/node or one register + // required with another non-required. + if (C == kX86RegClassGp && aIndex != kInvalidReg) { + VarAttr* bVa = bVd->getVa(); + _context->swapGp(aVd, bVd); + + aVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + + // Doublehit, two registers allocated by a single swap. + if (bVa != nullptr && bVa->getInRegIndex() == aIndex) { + bVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + } + + didWork = true; + continue; + } + } + else if (aIndex != kInvalidReg) { + _context->move(aVd, bIndex); + + aVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + + didWork = true; + continue; + } + else { + _context->alloc(aVd, bIndex); + + aVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + + didWork = true; + continue; + } + } + } while (didWork); + + // Alloc 'out' regs. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + if ((va->getFlags() & (kVarAttrXReg | kVarAttrAllocWDone)) != kVarAttrWReg) + continue; + + uint32_t regIndex = va->getOutRegIndex(); + ASMJIT_ASSERT(regIndex != kInvalidReg); + + if (vd->getRegIndex() != regIndex) { + ASMJIT_ASSERT(getState()->getListByClass(C)[regIndex] == nullptr); + _context->attach(vd, regIndex, false); + } + + va->orFlags(kVarAttrAllocWDone); + addVaDone(C); + } +} + +// ============================================================================ +// [asmjit::X86VarAlloc - GuessAlloc / GuessSpill] +// ============================================================================ + +#if 0 +// TODO: This works, but should be improved a bit. The idea is to follow code +// flow and to restrict the possible registers where to allocate as much as +// possible so we won't allocate to a register which is home of some variable +// that's gonna be used together with `vd`. The previous implementation didn't +// care about it and produced suboptimal results even in code which didn't +// require any allocs & spills. +enum { kMaxGuessFlow = 10 }; + +struct GuessFlowData { + ASMJIT_INLINE void init(HLNode* node, uint32_t counter, uint32_t safeRegs) { + _node = node; + _counter = counter; + _safeRegs = safeRegs; + } + + //! Node to start. + HLNode* _node; + //! Number of instructions processed from the beginning. + uint32_t _counter; + //! Safe registers, which can be used for the allocation. + uint32_t _safeRegs; +}; + +template +ASMJIT_INLINE uint32_t X86VarAlloc::guessAlloc(VarData* vd, uint32_t allocableRegs) { + ASMJIT_TLOG("[RA-GUESS] === %s (Input=%08X) ===\n", vd->getName(), allocableRegs); + ASMJIT_ASSERT(allocableRegs != 0); + + return allocableRegs; + + // Stop now if there is only one bit (register) set in `allocableRegs` mask. + uint32_t safeRegs = allocableRegs; + if (Utils::isPowerOf2(safeRegs)) + return safeRegs; + + uint32_t counter = 0; + uint32_t maxInst = _compiler->getMaxLookAhead(); + + uint32_t localId = vd->getLocalId(); + uint32_t localToken = _compiler->_generateUniqueToken(); + + uint32_t gfIndex = 0; + GuessFlowData gfArray[kMaxGuessFlow]; + + HLNode* node = _node; + + // Mark this node and also exit node, it will terminate the loop if encountered. + node->setTokenId(localToken); + _context->getFunc()->getExitNode()->setTokenId(localToken); + + // TODO: I don't like this jump, maybe some refactor would help to eliminate it. + goto _Advance; + + // Look ahead and calculate mask of special registers on both - input/output. + for (;;) { + do { + ASMJIT_TSEC({ + _context->_traceNode(_context, node, " "); + }); + + // Terminate if we have seen this node already. + if (node->hasTokenId(localToken)) + break; + + node->setTokenId(localToken); + counter++; + + // Terminate if the variable is dead here. + if (node->hasLiveness() && !node->getLiveness()->getBit(localId)) { + ASMJIT_TLOG("[RA-GUESS] %s (Terminating, Not alive here)\n", vd->getName()); + break; + } + + if (node->hasState()) { + // If this node contains a state, we have to consider only the state + // and then we can terminate safely - this happens if we jumped to a + // label that is backward (i.e. start of the loop). If we survived + // the liveness check it means that the variable is actually used. + X86VarState* state = node->getState(); + uint32_t homeRegs = 0; + uint32_t tempRegs = 0; + + VarData** vdArray = state->getListByClass(C); + uint32_t vdCount = _compiler->getRegCount().get(C); + + for (uint32_t vdIndex = 0; vdIndex < vdCount; vdIndex++) { + if (vdArray[vdIndex] != nullptr) + tempRegs |= Utils::mask(vdIndex); + + if (vdArray[vdIndex] == vd) + homeRegs = Utils::mask(vdIndex); + } + + tempRegs = safeRegs & ~tempRegs; + if (!tempRegs) + goto _Done; + safeRegs = tempRegs; + + tempRegs = safeRegs & homeRegs; + if (!tempRegs) + goto _Done; + safeRegs = tempRegs; + + goto _Done; + } + else { + // Process the current node if it has any variables associated in. + X86VarMap* map = node->getMap(); + if (map != nullptr) { + VarAttr* vaList = map->getVaListByClass(C); + uint32_t vaCount = map->getVaCountByClass(C); + + uint32_t homeRegs = 0; + uint32_t tempRegs = safeRegs; + bool found = false; + + for (uint32_t vaIndex = 0; vaIndex < vaCount; vaIndex++) { + VarAttr* va = &vaList[vaIndex]; + + if (va->getVd() == vd) { + found = true; + + // Terminate if the variable is overwritten here. + if (!(va->getFlags() & kVarAttrRAll)) + goto _Done; + + uint32_t mask = va->getAllocableRegs(); + if (mask != 0) { + tempRegs &= mask; + if (!tempRegs) + goto _Done; + safeRegs = tempRegs; + } + + mask = va->getInRegs(); + if (mask != 0) { + tempRegs &= mask; + if (!tempRegs) + goto _Done; + + safeRegs = tempRegs; + goto _Done; + } + } + else { + // It happens often that one variable is used across many blocks of + // assembly code. It can sometimes cause one variable to be allocated + // in a different register, which can cause state switch to generate + // moves in case of jumps and state intersections. We try to prevent + // this case by also considering variables' home registers. + homeRegs |= va->getVd()->getHomeMask(); + } + } + + tempRegs &= ~(map->_outRegs.get(C) | map->_clobberedRegs.get(C)); + if (!found) + tempRegs &= ~map->_inRegs.get(C); + + if (!tempRegs) + goto _Done; + safeRegs = tempRegs; + + if (homeRegs) { + tempRegs = safeRegs & ~homeRegs; + if (!tempRegs) + goto _Done; + safeRegs = tempRegs; + } + } + } + +_Advance: + // Terminate if this is a return node. + if (node->hasFlag(HLNode::kFlagIsRet)) + goto _Done; + + // Advance on non-conditional jump. + if (node->hasFlag(HLNode::kFlagIsJmp)) { + // Stop on a jump that is not followed. + node = static_cast(node)->getTarget(); + if (node == nullptr) + break; + continue; + } + + // Split flow on a conditional jump. + if (node->hasFlag(HLNode::kFlagIsJcc)) { + // Put the next node on the stack and follow the target if possible. + HLNode* next = node->getNext(); + if (next != nullptr && gfIndex < kMaxGuessFlow) + gfArray[gfIndex++].init(next, counter, safeRegs); + + node = static_cast(node)->getTarget(); + if (node == nullptr) + break; + continue; + } + + node = node->getNext(); + ASMJIT_ASSERT(node != nullptr); + } while (counter < maxInst); + +_Done: + for (;;) { + if (gfIndex == 0) + goto _Ret; + + GuessFlowData* data = &gfArray[--gfIndex]; + node = data->_node; + counter = data->_counter; + + uint32_t tempRegs = safeRegs & data->_safeRegs; + if (!tempRegs) + continue; + + safeRegs = tempRegs; + break; + } + } + +_Ret: + ASMJIT_TLOG("[RA-GUESS] === %s (Output=%08X) ===\n", vd->getName(), safeRegs); + return safeRegs; +} +#endif + +template +ASMJIT_INLINE uint32_t X86VarAlloc::guessAlloc(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + // Stop now if there is only one bit (register) set in `allocableRegs` mask. + if (Utils::isPowerOf2(allocableRegs)) + return allocableRegs; + + uint32_t localId = vd->getLocalId(); + uint32_t safeRegs = allocableRegs; + + uint32_t i; + uint32_t maxLookAhead = _compiler->getMaxLookAhead(); + + // Look ahead and calculate mask of special registers on both - input/output. + HLNode* node = _node; + for (i = 0; i < maxLookAhead; i++) { + BitArray* liveness = node->getLiveness(); + + // If the variable becomes dead it doesn't make sense to continue. + if (liveness != nullptr && !liveness->getBit(localId)) + break; + + // Stop on `HLSentinel` and `HLRet`. + if (node->hasFlag(HLNode::kFlagIsRet)) + break; + + // Stop on conditional jump, we don't follow them. + if (node->hasFlag(HLNode::kFlagIsJcc)) + break; + + // Advance on non-conditional jump. + if (node->hasFlag(HLNode::kFlagIsJmp)) { + node = static_cast(node)->getTarget(); + // Stop on jump that is not followed. + if (node == nullptr) + break; + } + + node = node->getNext(); + ASMJIT_ASSERT(node != nullptr); + + X86VarMap* map = node->getMap(); + if (map != nullptr) { + VarAttr* va = map->findVaByClass(C, vd); + uint32_t mask; + + if (va != nullptr) { + // If the variable is overwritten it doesn't mase sense to continue. + if (!(va->getFlags() & kVarAttrRAll)) + break; + + mask = va->getAllocableRegs(); + if (mask != 0) { + allocableRegs &= mask; + if (allocableRegs == 0) + break; + safeRegs = allocableRegs; + } + + mask = va->getInRegs(); + if (mask != 0) { + allocableRegs &= mask; + if (allocableRegs == 0) + break; + safeRegs = allocableRegs; + break; + } + + allocableRegs &= ~(map->_outRegs.get(C) | map->_clobberedRegs.get(C)); + if (allocableRegs == 0) + break; + } + else { + allocableRegs &= ~(map->_inRegs.get(C) | map->_outRegs.get(C) | map->_clobberedRegs.get(C)); + if (allocableRegs == 0) + break; + } + + safeRegs = allocableRegs; + } + } + + return safeRegs; +} + +template +ASMJIT_INLINE uint32_t X86VarAlloc::guessSpill(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + return 0; +} + +// ============================================================================ +// [asmjit::X86VarAlloc - Modified] +// ============================================================================ + +template +ASMJIT_INLINE void X86VarAlloc::modified() { + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + for (uint32_t i = 0; i < count; i++) { + VarAttr* va = &list[i]; + + if (va->hasFlag(kVarAttrWReg)) { + VarData* vd = va->getVd(); + + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = Utils::mask(regIndex); + + vd->setModified(true); + _context->_x86State._modified.or_(C, regMask); + } + } +} + +// ============================================================================ +// [asmjit::X86CallAlloc] +// ============================================================================ + +//! \internal +//! +//! Register allocator context (function call). +struct X86CallAlloc : public X86BaseAlloc { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86CallAlloc(X86Context* context) : X86BaseAlloc(context) {} + ASMJIT_INLINE ~X86CallAlloc() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get the node. + ASMJIT_INLINE X86CallNode* getNode() const { return static_cast(_node); } + + // -------------------------------------------------------------------------- + // [Run] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Error run(X86CallNode* node); + + // -------------------------------------------------------------------------- + // [Init / Cleanup] + // -------------------------------------------------------------------------- + +protected: + // Just to prevent calling these methods from X86Context::translate(). + ASMJIT_INLINE void init(X86CallNode* node, X86VarMap* map); + ASMJIT_INLINE void cleanup(); + + // -------------------------------------------------------------------------- + // [Plan / Alloc / Spill / Move] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void plan(); + + template + ASMJIT_INLINE void spill(); + + template + ASMJIT_INLINE void alloc(); + + // -------------------------------------------------------------------------- + // [AllocImmsOnStack] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void allocImmsOnStack(); + + // -------------------------------------------------------------------------- + // [Duplicate] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void duplicate(); + + // -------------------------------------------------------------------------- + // [GuessAlloc / GuessSpill] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE uint32_t guessAlloc(VarData* vd, uint32_t allocableRegs); + + template + ASMJIT_INLINE uint32_t guessSpill(VarData* vd, uint32_t allocableRegs); + + // -------------------------------------------------------------------------- + // [Save] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void save(); + + // -------------------------------------------------------------------------- + // [Clobber] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void clobber(); + + // -------------------------------------------------------------------------- + // [Ret] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void ret(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Will alloc to these registers. + X86RegMask _willAlloc; + //! Will spill these registers. + X86RegMask _willSpill; +}; + +// ============================================================================ +// [asmjit::X86CallAlloc - Run] +// ============================================================================ + +ASMJIT_INLINE Error X86CallAlloc::run(X86CallNode* node) { + // Initialize. + X86VarMap* map = node->getMap(); + if (map == nullptr) + return kErrorOk; + + // Initialize the allocator; prepare basics and connect Vd->Va. + init(node, map); + + // Plan register allocation. Planner is only able to assign one register per + // variable. If any variable is used multiple times it will be handled later. + plan(); + plan(); + plan(); + + // Spill. + spill(); + spill(); + spill(); + + // Alloc. + alloc(); + alloc(); + alloc(); + + // Unuse clobbered registers that are not used to pass function arguments and + // save variables used to pass function arguments that will be reused later on. + save(); + save(); + save(); + + // Allocate immediates in registers and on the stack. + allocImmsOnStack(); + + // Duplicate. + duplicate(); + duplicate(); + duplicate(); + + // Translate call operand. + ASMJIT_PROPAGATE_ERROR(X86Context_translateOperands(_context, &node->_target, 1)); + + // To emit instructions after call. + _compiler->_setCursor(node); + + // If the callee pops stack it has to be manually adjusted back. + X86FuncDecl* decl = node->getDecl(); + if (decl->getCalleePopsStack() && decl->getArgStackSize() != 0) { + _compiler->emit(kX86InstIdSub, _context->_zsp, static_cast(decl->getArgStackSize())); + } + + // Clobber. + clobber(); + clobber(); + clobber(); + + // Return. + ret(); + + // Unuse. + unuseAfter(); + unuseAfter(); + unuseAfter(); + + // Cleanup; disconnect Vd->Va. + cleanup(); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86CallAlloc - Init / Cleanup] +// ============================================================================ + +ASMJIT_INLINE void X86CallAlloc::init(X86CallNode* node, X86VarMap* map) { + X86BaseAlloc::init(node, map); + + // Create mask of all registers that will be used to pass function arguments. + _willAlloc = node->_usedArgs; + _willSpill.reset(); +} + +ASMJIT_INLINE void X86CallAlloc::cleanup() { + X86BaseAlloc::cleanup(); +} + +// ============================================================================ +// [asmjit::X86CallAlloc - Plan / Spill / Alloc] +// ============================================================================ + +template +ASMJIT_INLINE void X86CallAlloc::plan() { + uint32_t i; + uint32_t clobbered = _map->_clobberedRegs.get(C); + + uint32_t willAlloc = _willAlloc.get(C); + uint32_t willFree = clobbered & ~willAlloc; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + X86VarState* state = getState(); + + // Calculate 'willAlloc' and 'willFree' masks based on mandatory masks. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = (regIndex != kInvalidReg) ? Utils::mask(regIndex) : 0; + + if ((vaFlags & kVarAttrRReg) != 0) { + // Planning register allocation. First check whether the variable is + // already allocated in register and if it can stay there. Function + // arguments are passed either in a specific register or in stack so + // we care mostly of mandatory registers. + uint32_t inRegs = va->getInRegs(); + + if (inRegs == 0) { + inRegs = va->getAllocableRegs(); + } + + // Optimize situation where the variable has to be allocated in a + // mandatory register, but it's already allocated in register that + // is not clobbered (i.e. it will survive function call). + if ((regMask & inRegs) != 0 || ((regMask & ~clobbered) != 0 && (vaFlags & kVarAttrUnuse) == 0)) { + va->setInRegIndex(regIndex); + va->orFlags(kVarAttrAllocRDone); + addVaDone(C); + } + else { + willFree |= regMask; + } + } + else { + // Memory access - if variable is allocated it has to be freed. + if (regMask != 0) { + willFree |= regMask; + } + else { + va->orFlags(kVarAttrAllocRDone); + addVaDone(C); + } + } + } + + // Occupied registers without 'willFree' registers; contains basically + // all the registers we can use to allocate variables without inRegs + // speficied. + uint32_t occupied = state->_occupied.get(C) & ~willFree; + uint32_t willSpill = 0; + + // Find the best registers for variables that are not allocated yet. Only + // useful for Gp registers used as call operand. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + if ((vaFlags & kVarAttrAllocRDone) != 0 || (vaFlags & kVarAttrRReg) == 0) + continue; + + // All registers except Gp used by call itself must have inRegIndex. + uint32_t m = va->getInRegs(); + if (C != kX86RegClassGp || m) { + ASMJIT_ASSERT(m != 0); + va->setInRegIndex(Utils::findFirstBit(m)); + willSpill |= occupied & m; + continue; + } + + m = va->getAllocableRegs() & ~(willAlloc ^ m); + m = guessAlloc(vd, m); + ASMJIT_ASSERT(m != 0); + + uint32_t candidateRegs = m & ~occupied; + if (candidateRegs == 0) { + candidateRegs = m & occupied & ~state->_modified.get(C); + if (candidateRegs == 0) + candidateRegs = m; + } + + if (!(vaFlags & (kVarAttrWReg | kVarAttrUnuse)) && (candidateRegs & ~clobbered)) + candidateRegs &= ~clobbered; + + uint32_t regIndex = Utils::findFirstBit(candidateRegs); + uint32_t regMask = Utils::mask(regIndex); + + va->setInRegIndex(regIndex); + va->setInRegs(regMask); + + willAlloc |= regMask; + willSpill |= regMask & occupied; + willFree &= ~regMask; + + occupied |= regMask; + continue; + } + + // Set calculated masks back to the allocator; needed by spill() and alloc(). + _willSpill.set(C, willSpill); + _willAlloc.set(C, willAlloc); +} + +template +ASMJIT_INLINE void X86CallAlloc::spill() { + uint32_t m = _willSpill.get(C); + uint32_t i = static_cast(0) - 1; + + if (m == 0) + return; + + X86VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + // Available registers for decision if move has any benefit over spill. + uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C)); + + do { + // We always advance one more to destroy the bit that we have found. + uint32_t bitIndex = Utils::findFirstBit(m) + 1; + + i += bitIndex; + m >>= bitIndex; + + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != nullptr); + ASMJIT_ASSERT(vd->getVa() == nullptr); + + if (vd->isModified() && availableRegs) { + uint32_t available = guessSpill(vd, availableRegs); + if (available != 0) { + uint32_t regIndex = Utils::findFirstBit(available); + uint32_t regMask = Utils::mask(regIndex); + + _context->move(vd, regIndex); + availableRegs ^= regMask; + continue; + } + } + + _context->spill(vd); + } while (m != 0); +} + +template +ASMJIT_INLINE void X86CallAlloc::alloc() { + if (isVaDone(C)) + return; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + X86VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t i; + bool didWork; + + do { + didWork = false; + for (i = 0; i < count; i++) { + VarAttr* aVa = &list[i]; + VarData* aVd = aVa->getVd(); + + if ((aVa->getFlags() & (kVarAttrRReg | kVarAttrAllocRDone)) != kVarAttrRReg) + continue; + + uint32_t aIndex = aVd->getRegIndex(); + uint32_t bIndex = aVa->getInRegIndex(); + + // Shouldn't be the same. + ASMJIT_ASSERT(aIndex != bIndex); + + VarData* bVd = getState()->getListByClass(C)[bIndex]; + if (bVd != nullptr) { + VarAttr* bVa = bVd->getVa(); + + // Gp registers only - Swap two registers if we can solve two + // allocation tasks by a single 'xchg' instruction, swapping + // two registers required by the instruction/node or one register + // required with another non-required. + if (C == kX86RegClassGp) { + _context->swapGp(aVd, bVd); + + aVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + + // Doublehit, two registers allocated by a single swap. + if (bVa != nullptr && bVa->getInRegIndex() == aIndex) { + bVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + } + + didWork = true; + continue; + } + } + else if (aIndex != kInvalidReg) { + _context->move(aVd, bIndex); + _context->_clobberedRegs.or_(C, Utils::mask(bIndex)); + + aVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + + didWork = true; + continue; + } + else { + _context->alloc(aVd, bIndex); + _context->_clobberedRegs.or_(C, Utils::mask(bIndex)); + + aVa->orFlags(kVarAttrAllocRDone); + addVaDone(C); + + didWork = true; + continue; + } + } + } while (didWork); +} + +// ============================================================================ +// [asmjit::X86CallAlloc - AllocImmsOnStack] +// ============================================================================ + +ASMJIT_INLINE void X86CallAlloc::allocImmsOnStack() { + X86CallNode* node = getNode(); + X86FuncDecl* decl = node->getDecl(); + + uint32_t argCount = decl->getNumArgs(); + Operand* args = node->_args; + + for (uint32_t i = 0; i < argCount; i++) { + Operand& op = args[i]; + + if (!op.isImm()) + continue; + + const Imm& imm = static_cast(op); + const FuncInOut& arg = decl->getArg(i); + uint32_t varType = arg.getVarType(); + + if (arg.hasStackOffset()) { + X86Mem dst = x86::ptr(_context->_zsp, -static_cast(_context->getRegSize()) + arg.getStackOffset()); + _context->emitMoveImmOnStack(varType, &dst, &imm); + } + else { + _context->emitMoveImmToReg(varType, arg.getRegIndex(), &imm); + } + } +} + +// ============================================================================ +// [asmjit::X86CallAlloc - Duplicate] +// ============================================================================ + +template +ASMJIT_INLINE void X86CallAlloc::duplicate() { + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + for (uint32_t i = 0; i < count; i++) { + VarAttr* va = &list[i]; + if (!va->hasFlag(kVarAttrRReg)) + continue; + + uint32_t inRegs = va->getInRegs(); + if (!inRegs) + continue; + + VarData* vd = va->getVd(); + uint32_t regIndex = vd->getRegIndex(); + + ASMJIT_ASSERT(regIndex != kInvalidReg); + + inRegs &= ~Utils::mask(regIndex); + if (!inRegs) + continue; + + for (uint32_t dupIndex = 0; inRegs != 0; dupIndex++, inRegs >>= 1) { + if (inRegs & 0x1) { + _context->emitMove(vd, dupIndex, regIndex, "Duplicate"); + _context->_clobberedRegs.or_(C, Utils::mask(dupIndex)); + } + } + } +} + +// ============================================================================ +// [asmjit::X86CallAlloc - GuessAlloc / GuessSpill] +// ============================================================================ + +template +ASMJIT_INLINE uint32_t X86CallAlloc::guessAlloc(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + // Stop now if there is only one bit (register) set in 'allocableRegs' mask. + if (Utils::isPowerOf2(allocableRegs)) + return allocableRegs; + + uint32_t i; + uint32_t safeRegs = allocableRegs; + uint32_t maxLookAhead = _compiler->getMaxLookAhead(); + + // Look ahead and calculate mask of special registers on both - input/output. + HLNode* node = _node; + for (i = 0; i < maxLookAhead; i++) { + // Stop on 'HLRet' and 'HLSentinel. + if (node->hasFlag(HLNode::kFlagIsRet)) + break; + + // Stop on conditional jump, we don't follow them. + if (node->hasFlag(HLNode::kFlagIsJcc)) + break; + + // Advance on non-conditional jump. + if (node->hasFlag(HLNode::kFlagIsJmp)) { + node = static_cast(node)->getTarget(); + // Stop on jump that is not followed. + if (node == nullptr) + break; + } + + node = node->getNext(); + ASMJIT_ASSERT(node != nullptr); + + X86VarMap* map = node->getMap(); + if (map != nullptr) { + VarAttr* va = map->findVaByClass(C, vd); + if (va != nullptr) { + uint32_t inRegs = va->getInRegs(); + if (inRegs != 0) { + safeRegs = allocableRegs; + allocableRegs &= inRegs; + + if (allocableRegs == 0) + goto _UseSafeRegs; + else + return allocableRegs; + } + } + + safeRegs = allocableRegs; + allocableRegs &= ~(map->_inRegs.get(C) | map->_outRegs.get(C) | map->_clobberedRegs.get(C)); + + if (allocableRegs == 0) + break; + } + } + +_UseSafeRegs: + return safeRegs; +} + +template +ASMJIT_INLINE uint32_t X86CallAlloc::guessSpill(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + return 0; +} + +// ============================================================================ +// [asmjit::X86CallAlloc - Save] +// ============================================================================ + +template +ASMJIT_INLINE void X86CallAlloc::save() { + X86VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t i; + uint32_t affected = _map->_clobberedRegs.get(C) & state->_occupied.get(C) & state->_modified.get(C); + + for (i = 0; affected != 0; i++, affected >>= 1) { + if (affected & 0x1) { + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != nullptr); + ASMJIT_ASSERT(vd->isModified()); + + VarAttr* va = vd->getVa(); + if (va == nullptr || (va->getFlags() & (kVarAttrWReg | kVarAttrUnuse)) == 0) { + _context->save(vd); + } + } + } +} + +// ============================================================================ +// [asmjit::X86CallAlloc - Clobber] +// ============================================================================ + +template +ASMJIT_INLINE void X86CallAlloc::clobber() { + X86VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t i; + uint32_t affected = _map->_clobberedRegs.get(C) & state->_occupied.get(C); + + for (i = 0; affected != 0; i++, affected >>= 1) { + if (affected & 0x1) { + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != nullptr); + + VarAttr* va = vd->getVa(); + uint32_t vdState = kVarStateNone; + + if (!vd->isModified() || (va != nullptr && (va->getFlags() & (kVarAttrWAll | kVarAttrUnuse)) != 0)) { + vdState = kVarStateMem; + } + + _context->unuse(vd, vdState); + } + } +} + +// ============================================================================ +// [asmjit::X86CallAlloc - Ret] +// ============================================================================ + +ASMJIT_INLINE void X86CallAlloc::ret() { + X86CallNode* node = getNode(); + X86FuncDecl* decl = node->getDecl(); + + uint32_t i; + Operand* rets = node->_ret; + + for (i = 0; i < 2; i++) { + const FuncInOut& ret = decl->getRet(i); + Operand* op = &rets[i]; + + if (!ret.hasRegIndex() || !op->isVar()) + continue; + + VarData* vd = _compiler->getVdById(op->getId()); + uint32_t vf = _x86VarInfo[vd->getType()].getFlags(); + uint32_t regIndex = ret.getRegIndex(); + + switch (vd->getClass()) { + case kX86RegClassGp: + ASMJIT_ASSERT(x86VarTypeToClass(ret.getVarType()) == vd->getClass()); + + _context->unuse(vd); + _context->attach(vd, regIndex, true); + break; + + case kX86RegClassMm: + ASMJIT_ASSERT(x86VarTypeToClass(ret.getVarType()) == vd->getClass()); + + _context->unuse(vd); + _context->attach(vd, regIndex, true); + break; + + case kX86RegClassXyz: + if (ret.getVarType() == kVarTypeFp32 || ret.getVarType() == kVarTypeFp64) { + X86Mem m = _context->getVarMem(vd); + m.setSize( + (vf & VarInfo::kFlagSP) ? 4 : + (vf & VarInfo::kFlagDP) ? 8 : + (ret.getVarType() == kVarTypeFp32) ? 4 : 8); + + _context->unuse(vd, kVarStateMem); + _compiler->fstp(m); + } + else { + ASMJIT_ASSERT(x86VarTypeToClass(ret.getVarType()) == vd->getClass()); + + _context->unuse(vd); + _context->attach(vd, regIndex, true); + } + break; + } + } +} + +// ============================================================================ +// [asmjit::X86Context - TranslateOperands] +// ============================================================================ + +//! \internal +static Error X86Context_translateOperands(X86Context* self, Operand* opList, uint32_t opCount) { + X86Compiler* compiler = self->getCompiler(); + uint32_t hasGpdBase = compiler->getRegSize() == 4; + + // Translate variables into registers. + for (uint32_t i = 0; i < opCount; i++) { + Operand* op = &opList[i]; + + if (op->isVar()) { + VarData* vd = compiler->getVdById(op->getId()); + ASMJIT_ASSERT(vd != nullptr); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + + op->_vreg.op = Operand::kTypeReg; + op->_vreg.index = vd->getRegIndex(); + } + else if (op->isMem()) { + X86Mem* m = static_cast(op); + + if (m->isBaseIndexType() && OperandUtil::isVarId(m->getBase())) { + VarData* vd = compiler->getVdById(m->getBase()); + + if (m->getMemType() == kMemTypeBaseIndex) { + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + op->_vmem.base = vd->getRegIndex(); + } + else { + if (!vd->isMemArg()) + self->getVarCell(vd); + + // Offset will be patched later by X86Context_patchFuncMem(). + m->setGpdBase(hasGpdBase); + m->adjust(vd->isMemArg() ? self->_argActualDisp : self->_varActualDisp); + } + } + + if (OperandUtil::isVarId(m->getIndex())) { + VarData* vd = compiler->getVdById(m->getIndex()); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + ASMJIT_ASSERT(vd->getRegIndex() != kX86RegIndexR12); + op->_vmem.index = vd->getRegIndex(); + } + } + } + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86Context - TranslatePrologEpilog] +// ============================================================================ + +//! \internal +static Error X86Context_initFunc(X86Context* self, X86FuncNode* func) { + X86Compiler* compiler = self->getCompiler(); + X86FuncDecl* decl = func->getDecl(); + + X86RegMask& clobberedRegs = self->_clobberedRegs; + uint32_t regSize = compiler->getRegSize(); + + // Setup "Save-Restore" registers. + func->_saveRestoreRegs.set(kX86RegClassGp , clobberedRegs.get(kX86RegClassGp ) & decl->getPreserved(kX86RegClassGp )); + func->_saveRestoreRegs.set(kX86RegClassMm , clobberedRegs.get(kX86RegClassMm ) & decl->getPreserved(kX86RegClassMm )); + func->_saveRestoreRegs.set(kX86RegClassK , 0); + func->_saveRestoreRegs.set(kX86RegClassXyz, clobberedRegs.get(kX86RegClassXyz) & decl->getPreserved(kX86RegClassXyz)); + + ASMJIT_ASSERT(!func->_saveRestoreRegs.has(kX86RegClassGp, Utils::mask(kX86RegIndexSp))); + + // Setup required stack alignment and kFuncFlagIsStackMisaligned. + { + uint32_t requiredStackAlignment = Utils::iMax(self->_memMaxAlign, self->getRegSize()); + + if (requiredStackAlignment < 16) { + // Require 16-byte alignment if 8-byte vars are used. + if (self->_mem8ByteVarsUsed) + requiredStackAlignment = 16; + else if (func->_saveRestoreRegs.get(kX86RegClassMm) || func->_saveRestoreRegs.get(kX86RegClassXyz)) + requiredStackAlignment = 16; + else if (Utils::inInterval(func->getRequiredStackAlignment(), 8, 16)) + requiredStackAlignment = 16; + } + + if (func->getRequiredStackAlignment() < requiredStackAlignment) + func->setRequiredStackAlignment(requiredStackAlignment); + + func->updateRequiredStackAlignment(); + } + + // Adjust stack pointer if function is caller. + if (func->isCaller()) { + func->addFuncFlags(kFuncFlagIsStackAdjusted); + func->_callStackSize = Utils::alignTo(func->getCallStackSize(), func->getRequiredStackAlignment()); + } + + // Adjust stack pointer if manual stack alignment is needed. + if (func->isStackMisaligned() && func->isNaked()) { + // Get a memory cell where the original stack frame will be stored. + VarCell* cell = self->_newStackCell(regSize, regSize); + if (cell == nullptr) + return self->getLastError(); // The error has already been set. + + func->addFuncFlags(kFuncFlagIsStackAdjusted); + self->_stackFrameCell = cell; + + if (decl->getArgStackSize() > 0) { + func->addFuncFlags(kFuncFlagX86MoveArgs); + func->setExtraStackSize(decl->getArgStackSize()); + } + + // Get temporary register which will be used to align the stack frame. + uint32_t fRegMask = Utils::bits(self->_regCount.getGp()); + uint32_t stackFrameCopyRegs; + + fRegMask &= ~(decl->getUsed(kX86RegClassGp) | Utils::mask(kX86RegIndexSp)); + stackFrameCopyRegs = fRegMask; + + // Try to remove modified registers from the mask. + uint32_t tRegMask = fRegMask & ~self->getClobberedRegs(kX86RegClassGp); + if (tRegMask != 0) + fRegMask = tRegMask; + + // Try to remove preserved registers from the mask. + tRegMask = fRegMask & ~decl->getPreserved(kX86RegClassGp); + if (tRegMask != 0) + fRegMask = tRegMask; + + ASMJIT_ASSERT(fRegMask != 0); + + uint32_t fRegIndex = Utils::findFirstBit(fRegMask); + func->_stackFrameRegIndex = static_cast(fRegIndex); + + // We have to save the register on the stack (it will be the part of prolog + // and epilog), however we shouldn't save it twice, so we will remove it + // from '_saveRestoreRegs' in case that it is preserved. + fRegMask = Utils::mask(fRegIndex); + if ((fRegMask & decl->getPreserved(kX86RegClassGp)) != 0) { + func->_saveRestoreRegs.andNot(kX86RegClassGp, fRegMask); + func->_isStackFrameRegPreserved = true; + } + + if (func->hasFuncFlag(kFuncFlagX86MoveArgs)) { + uint32_t maxRegs = (func->getArgStackSize() + regSize - 1) / regSize; + stackFrameCopyRegs &= ~fRegMask; + + tRegMask = stackFrameCopyRegs & self->getClobberedRegs(kX86RegClassGp); + uint32_t tRegCnt = Utils::bitCount(tRegMask); + + if (tRegCnt > 1 || (tRegCnt > 0 && tRegCnt <= maxRegs)) + stackFrameCopyRegs = tRegMask; + else + stackFrameCopyRegs = Utils::keepNOnesFromRight(stackFrameCopyRegs, Utils::iMin(maxRegs, 2)); + + func->_saveRestoreRegs.or_(kX86RegClassGp, stackFrameCopyRegs & decl->getPreserved(kX86RegClassGp)); + Utils::indexNOnesFromRight(func->_stackFrameCopyGpIndex, stackFrameCopyRegs, maxRegs); + } + } + // If function is not naked we generate standard "EBP/RBP" stack frame. + else if (!func->isNaked()) { + uint32_t fRegIndex = kX86RegIndexBp; + + func->_stackFrameRegIndex = static_cast(fRegIndex); + func->_isStackFrameRegPreserved = true; + } + + ASMJIT_PROPAGATE_ERROR(self->resolveCellOffsets()); + + // Adjust stack pointer if requested memory can't fit into "Red Zone" or "Spill Zone". + if (self->_memAllTotal > Utils::iMax(func->getRedZoneSize(), func->getSpillZoneSize())) { + func->addFuncFlags(kFuncFlagIsStackAdjusted); + } + + // Setup stack size used to save preserved registers. + { + uint32_t memGpSize = Utils::bitCount(func->_saveRestoreRegs.get(kX86RegClassGp )) * regSize; + uint32_t memMmSize = Utils::bitCount(func->_saveRestoreRegs.get(kX86RegClassMm )) * 8; + uint32_t memXmmSize = Utils::bitCount(func->_saveRestoreRegs.get(kX86RegClassXyz)) * 16; + + func->_pushPopStackSize = memGpSize; + func->_moveStackSize = memXmmSize + Utils::alignTo(memMmSize, 16); + } + + // Setup adjusted stack size. + if (func->isStackMisaligned()) { + func->_alignStackSize = 0; + } + else { + // If function is aligned, the RETURN address is stored in the aligned + // [ZSP - PtrSize] which makes current ZSP unaligned. + int32_t v = static_cast(regSize); + + // If we have to store function frame pointer we have to count it as well, + // because it is the first thing pushed on the stack. + if (func->hasStackFrameReg() && func->isStackFrameRegPreserved()) + v += regSize; + + // Count push/pop sequence. + v += func->getPushPopStackSize(); + + // Count save/restore sequence for XMM registers (should be already aligned). + v += func->getMoveStackSize(); + + // Maximum memory required to call all functions within this function. + v += func->getCallStackSize(); + + // Calculate the final offset to keep stack alignment. + func->_alignStackSize = Utils::alignDiff(v, func->getRequiredStackAlignment()); + } + + // Memory stack size. + func->_memStackSize = self->_memAllTotal; + func->_alignedMemStackSize = Utils::alignTo(func->_memStackSize, func->getRequiredStackAlignment()); + + if (func->isNaked()) { + self->_argBaseReg = kX86RegIndexSp; + + if (func->isStackAdjusted()) { + if (func->isStackMisaligned()) { + self->_argBaseOffset = static_cast( + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize() + + func->getAlignStackSize()); + self->_argBaseOffset -= regSize; + } + else { + self->_argBaseOffset = static_cast( + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize() + + func->getPushPopStackSize() + + func->getExtraStackSize() + + func->getAlignStackSize()); + } + } + else { + self->_argBaseOffset = func->getPushPopStackSize(); + } + } + else { + self->_argBaseReg = kX86RegIndexBp; + // Caused by "push zbp". + self->_argBaseOffset = regSize; + } + + self->_varBaseReg = kX86RegIndexSp; + self->_varBaseOffset = func->getCallStackSize(); + + if (!func->isStackAdjusted()) { + self->_varBaseOffset = -static_cast( + func->_alignStackSize + + func->_alignedMemStackSize + + func->_moveStackSize); + } + + return kErrorOk; +} + +//! \internal +static Error X86Context_patchFuncMem(X86Context* self, X86FuncNode* func, HLNode* stop) { + X86Compiler* compiler = self->getCompiler(); + HLNode* node = func; + + do { + if (node->getType() == HLNode::kTypeInst) { + HLInst* iNode = static_cast(node); + + if (iNode->hasMemOp()) { + X86Mem* m = iNode->getMemOp(); + + if (m->getMemType() == kMemTypeStackIndex && OperandUtil::isVarId(m->getBase())) { + VarData* vd = compiler->getVdById(m->getBase()); + ASMJIT_ASSERT(vd != nullptr); + + if (vd->isMemArg()) { + m->_vmem.base = self->_argBaseReg; + m->_vmem.displacement += self->_argBaseOffset + vd->getMemOffset(); + } + else { + VarCell* cell = vd->getMemCell(); + ASMJIT_ASSERT(cell != nullptr); + + m->_vmem.base = self->_varBaseReg; + m->_vmem.displacement += self->_varBaseOffset + cell->getOffset(); + } + } + } + } + + node = node->getNext(); + } while (node != stop); + + return kErrorOk; +} + +//! \internal +static Error X86Context_translatePrologEpilog(X86Context* self, X86FuncNode* func) { + X86Compiler* compiler = self->getCompiler(); + X86FuncDecl* decl = func->getDecl(); + + uint32_t regSize = compiler->getRegSize(); + + int32_t stackSize = static_cast( + func->getAlignStackSize() + + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize() + + func->getExtraStackSize()); + int32_t stackAlignment = func->getRequiredStackAlignment(); + + int32_t stackBase; + int32_t stackPtr; + + if (func->isStackAdjusted()) { + stackBase = static_cast( + func->getCallStackSize() + + func->getAlignedMemStackSize()); + } + else { + stackBase = -static_cast( + func->getAlignedMemStackSize() + + func->getAlignStackSize() + + func->getExtraStackSize()); + } + + uint32_t i, mask; + uint32_t regsGp = func->getSaveRestoreRegs(kX86RegClassGp ); + uint32_t regsMm = func->getSaveRestoreRegs(kX86RegClassMm ); + uint32_t regsXmm = func->getSaveRestoreRegs(kX86RegClassXyz); + + bool earlyPushPop = false; + bool useLeaEpilog = false; + + X86GpReg gpReg(self->_zsp); + X86GpReg fpReg(self->_zbp); + + X86Mem fpOffset; + + // -------------------------------------------------------------------------- + // [Prolog] + // -------------------------------------------------------------------------- + + compiler->_setCursor(func->getEntryNode()); + + // Entry. + if (func->isNaked()) { + if (func->isStackMisaligned()) { + fpReg.setIndex(func->getStackFrameRegIndex()); + fpOffset = x86::ptr(self->_zsp, self->_varBaseOffset + static_cast(self->_stackFrameCell->getOffset())); + + earlyPushPop = true; + self->emitPushSequence(regsGp); + + if (func->isStackFrameRegPreserved()) + compiler->emit(kX86InstIdPush, fpReg); + + compiler->emit(kX86InstIdMov, fpReg, self->_zsp); + } + } + else { + compiler->emit(kX86InstIdPush, fpReg); + compiler->emit(kX86InstIdMov, fpReg, self->_zsp); + } + + if (!earlyPushPop) { + self->emitPushSequence(regsGp); + if (func->isStackMisaligned() && regsGp != 0) + useLeaEpilog = true; + } + + // Adjust stack pointer. + if (func->isStackAdjusted()) { + stackBase = static_cast(func->getAlignedMemStackSize() + func->getCallStackSize()); + + if (stackSize) + compiler->emit(kX86InstIdSub, self->_zsp, stackSize); + + if (func->isStackMisaligned()) + compiler->emit(kX86InstIdAnd, self->_zsp, -stackAlignment); + + if (func->isStackMisaligned() && func->isNaked()) + compiler->emit(kX86InstIdMov, fpOffset, fpReg); + } + else { + stackBase = -static_cast(func->getAlignStackSize() + func->getMoveStackSize()); + } + + // Save XMM/MMX/GP (Mov). + stackPtr = stackBase; + for (i = 0, mask = regsXmm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kX86InstIdMovaps, x86::oword_ptr(self->_zsp, stackPtr), x86::xmm(i)); + stackPtr += 16; + } + } + + for (i = 0, mask = regsMm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kX86InstIdMovq, x86::qword_ptr(self->_zsp, stackPtr), x86::mm(i)); + stackPtr += 8; + } + } + + // -------------------------------------------------------------------------- + // [Move-Args] + // -------------------------------------------------------------------------- + + if (func->hasFuncFlag(kFuncFlagX86MoveArgs)) { + uint32_t argStackPos = 0; + uint32_t argStackSize = decl->getArgStackSize(); + + uint32_t moveIndex = 0; + uint32_t moveCount = (argStackSize + regSize - 1) / regSize; + + X86GpReg r[8]; + uint32_t numRegs = 0; + + for (i = 0; i < ASMJIT_ARRAY_SIZE(func->_stackFrameCopyGpIndex); i++) + if (func->_stackFrameCopyGpIndex[i] != kInvalidReg) + r[numRegs++] = gpReg.setIndex(func->_stackFrameCopyGpIndex[i]); + ASMJIT_ASSERT(numRegs > 0); + + int32_t dSrc = func->getPushPopStackSize() + regSize; + int32_t dDst = func->getAlignStackSize() + + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize(); + + if (func->isStackFrameRegPreserved()) + dSrc += regSize; + + X86Mem mSrc = x86::ptr(fpReg, dSrc); + X86Mem mDst = x86::ptr(self->_zsp, dDst); + + while (moveIndex < moveCount) { + uint32_t numMovs = Utils::iMin(moveCount - moveIndex, numRegs); + + for (i = 0; i < numMovs; i++) + compiler->emit(kX86InstIdMov, r[i], mSrc.adjusted((moveIndex + i) * regSize)); + for (i = 0; i < numMovs; i++) + compiler->emit(kX86InstIdMov, mDst.adjusted((moveIndex + i) * regSize), r[i]); + + argStackPos += numMovs * regSize; + moveIndex += numMovs; + } + } + + // -------------------------------------------------------------------------- + // [Epilog] + // -------------------------------------------------------------------------- + + compiler->_setCursor(func->getExitNode()); + + // Restore XMM/MMX/GP (Mov). + stackPtr = stackBase; + for (i = 0, mask = regsXmm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kX86InstIdMovaps, x86::xmm(i), x86::oword_ptr(self->_zsp, stackPtr)); + stackPtr += 16; + } + } + + for (i = 0, mask = regsMm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kX86InstIdMovq, x86::mm(i), x86::qword_ptr(self->_zsp, stackPtr)); + stackPtr += 8; + } + } + + // Adjust stack. + if (useLeaEpilog) { + compiler->emit(kX86InstIdLea, self->_zsp, x86::ptr(fpReg, -static_cast(func->getPushPopStackSize()))); + } + else if (!func->isStackMisaligned()) { + if (func->isStackAdjusted() && stackSize != 0) + compiler->emit(kX86InstIdAdd, self->_zsp, stackSize); + } + + // Restore Gp (Push/Pop). + if (!earlyPushPop) + self->emitPopSequence(regsGp); + + // Emms. + if (func->hasFuncFlag(kFuncFlagX86Emms)) + compiler->emit(kX86InstIdEmms); + + // MFence/SFence/LFence. + if (func->hasFuncFlag(kFuncFlagX86SFence) & func->hasFuncFlag(kFuncFlagX86LFence)) + compiler->emit(kX86InstIdMfence); + else if (func->hasFuncFlag(kFuncFlagX86SFence)) + compiler->emit(kX86InstIdSfence); + else if (func->hasFuncFlag(kFuncFlagX86LFence)) + compiler->emit(kX86InstIdLfence); + + // Leave. + if (func->isNaked()) { + if (func->isStackMisaligned()) { + compiler->emit(kX86InstIdMov, self->_zsp, fpOffset); + + if (func->isStackFrameRegPreserved()) + compiler->emit(kX86InstIdPop, fpReg); + + if (earlyPushPop) + self->emitPopSequence(regsGp); + } + } + else { + if (useLeaEpilog) { + compiler->emit(kX86InstIdPop, fpReg); + } + else if (func->hasFuncFlag(kFuncFlagX86Leave)) { + compiler->emit(kX86InstIdLeave); + } + else { + compiler->emit(kX86InstIdMov, self->_zsp, fpReg); + compiler->emit(kX86InstIdPop, fpReg); + } + } + + // Emit return. + if (decl->getCalleePopsStack()) + compiler->emit(kX86InstIdRet, static_cast(decl->getArgStackSize())); + else + compiler->emit(kX86InstIdRet); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86Context - Translate - Jump] +// ============================================================================ + +//! \internal +static void X86Context_translateJump(X86Context* self, HLJump* jNode, HLLabel* jTarget) { + X86Compiler* compiler = self->getCompiler(); + HLNode* extNode = self->getExtraBlock(); + + compiler->_setCursor(extNode); + self->switchState(jTarget->getState()); + + // If one or more instruction has been added during switchState() it will be + // moved at the end of the function body. + if (compiler->getCursor() != extNode) { + // TODO: Can fail. + HLLabel* jTrampolineTarget = compiler->newLabelNode(); + + // Add the jump to the target. + compiler->jmp(jTarget->getLabel()); + + // Add the trampoline-label we jump to change the state. + extNode = compiler->setCursor(extNode); + compiler->addNode(jTrampolineTarget); + + // Finally, patch the jump target. + ASMJIT_ASSERT(jNode->getOpCount() > 0); + jNode->_opList[0] = jTrampolineTarget->getLabel(); + jNode->_target = jTrampolineTarget; + } + + // Store the `extNode` and load the state back. + self->setExtraBlock(extNode); + self->loadState(jNode->_state); +} + +// ============================================================================ +// [asmjit::X86Context - Translate - Ret] +// ============================================================================ + +static Error X86Context_translateRet(X86Context* self, HLRet* rNode, HLLabel* exitTarget) { + X86Compiler* compiler = self->getCompiler(); + HLNode* node = rNode->getNext(); + + // 32-bit mode requires to push floating point return value(s), handle it + // here as it's a special case. + X86VarMap* map = rNode->getMap(); + if (map != nullptr) { + VarAttr* vaList = map->getVaList(); + uint32_t vaCount = map->getVaCount(); + + for (uint32_t i = 0; i < vaCount; i++) { + VarAttr& va = vaList[i]; + if (va.hasFlag(kVarAttrX86Fld4 | kVarAttrX86Fld8)) { + VarData* vd = va.getVd(); + X86Mem m(self->getVarMem(vd)); + + uint32_t flags = _x86VarInfo[vd->getType()].getFlags(); + m.setSize( + (flags & VarInfo::kFlagSP) ? 4 : + (flags & VarInfo::kFlagDP) ? 8 : + va.hasFlag(kVarAttrX86Fld4) ? 4 : 8); + + compiler->fld(m); + } + } + } + + // Decide whether to `jmp` or not in case we are next to the return label. + while (node != nullptr) { + switch (node->getType()) { + // If we have found an exit label we just return, there is no need to + // emit jump to that. + case HLNode::kTypeLabel: + if (static_cast(node) == exitTarget) + return kErrorOk; + goto _EmitRet; + + case HLNode::kTypeData: + case HLNode::kTypeInst: + case HLNode::kTypeCall: + case HLNode::kTypeRet: + goto _EmitRet; + + // Continue iterating. + case HLNode::kTypeComment: + case HLNode::kTypeAlign: + case HLNode::kTypeHint: + break; + + // Invalid node to be here. + case HLNode::kTypeFunc: + return self->getCompiler()->setLastError(kErrorInvalidState); + + // We can't go forward from here. + case HLNode::kTypeSentinel: + return kErrorOk; + } + + node = node->getNext(); + } + +_EmitRet: + { + compiler->_setCursor(rNode); + compiler->jmp(exitTarget->getLabel()); + } + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86Context - Translate - Func] +// ============================================================================ + +Error X86Context::translate() { + ASMJIT_TLOG("[T] ======= Translate (Begin)\n"); + + X86Compiler* compiler = getCompiler(); + X86FuncNode* func = getFunc(); + + // Register allocator contexts. + X86VarAlloc vAlloc(this); + X86CallAlloc cAlloc(this); + + // Flow. + HLNode* node_ = func; + HLNode* next = nullptr; + HLNode* stop = getStop(); + + PodList::Link* jLink = _jccList.getFirst(); + + for (;;) { + while (node_->isTranslated()) { + // Switch state if we went to the already translated node. + if (node_->getType() == HLNode::kTypeLabel) { + HLLabel* node = static_cast(node_); + compiler->_setCursor(node->getPrev()); + switchState(node->getState()); + } + +_NextGroup: + if (jLink == nullptr) { + goto _Done; + } + else { + node_ = jLink->getValue(); + jLink = jLink->getNext(); + + HLNode* jFlow = X86Context_getOppositeJccFlow(static_cast(node_)); + loadState(node_->getState()); + + if (jFlow->getState()) { + X86Context_translateJump(this, + static_cast(node_), + static_cast(jFlow)); + + node_ = jFlow; + if (node_->isTranslated()) + goto _NextGroup; + } + else { + node_ = jFlow; + } + + break; + } + } + + next = node_->getNext(); + node_->orFlags(HLNode::kFlagIsTranslated); + + ASMJIT_TSEC({ + this->_traceNode(this, node_, "[T] "); + }); + + switch (node_->getType()) { + // ---------------------------------------------------------------------- + // [Align / Embed] + // ---------------------------------------------------------------------- + + case HLNode::kTypeAlign: + case HLNode::kTypeData: + break; + + // ---------------------------------------------------------------------- + // [Target] + // ---------------------------------------------------------------------- + + case HLNode::kTypeLabel: { + HLLabel* node = static_cast(node_); + ASMJIT_ASSERT(!node->hasState()); + node->setState(saveState()); + break; + } + + // ---------------------------------------------------------------------- + // [Inst/Call/SArg/Ret] + // ---------------------------------------------------------------------- + + case HLNode::kTypeInst: + case HLNode::kTypeCall: + case HLNode::kTypeCallArg: + // Update VarAttr's unuse flags based on liveness of the next node. + if (!node_->isJcc()) { + X86VarMap* map = static_cast(node_->getMap()); + BitArray* liveness; + + if (map != nullptr && next != nullptr && (liveness = next->getLiveness()) != nullptr) { + VarAttr* vaList = map->getVaList(); + uint32_t vaCount = map->getVaCount(); + + for (uint32_t i = 0; i < vaCount; i++) { + VarAttr* va = &vaList[i]; + VarData* vd = va->getVd(); + + if (!liveness->getBit(vd->getLocalId())) + va->orFlags(kVarAttrUnuse); + } + } + } + + if (node_->getType() == HLNode::kTypeCall) { + ASMJIT_PROPAGATE_ERROR(cAlloc.run(static_cast(node_))); + break; + } + ASMJIT_FALLTHROUGH; + + case HLNode::kTypeHint: + case HLNode::kTypeRet: { + ASMJIT_PROPAGATE_ERROR(vAlloc.run(node_)); + + // Handle conditional/unconditional jump. + if (node_->isJmpOrJcc()) { + HLJump* node = static_cast(node_); + HLLabel* jTarget = node->getTarget(); + + // Target not followed. + if (jTarget == nullptr) { + if (node->isJmp()) + goto _NextGroup; + else + break; + } + + if (node->isJmp()) { + if (jTarget->hasState()) { + compiler->_setCursor(node->getPrev()); + switchState(jTarget->getState()); + + goto _NextGroup; + } + else { + next = jTarget; + } + } + else { + HLNode* jNext = node->getNext(); + + if (jTarget->isTranslated()) { + if (jNext->isTranslated()) { + ASMJIT_ASSERT(jNext->getType() == HLNode::kTypeLabel); + compiler->_setCursor(node->getPrev()); + intersectStates(jTarget->getState(), jNext->getState()); + } + + VarState* savedState = saveState(); + node->setState(savedState); + + X86Context_translateJump(this, node, jTarget); + next = jNext; + } + else if (jNext->isTranslated()) { + ASMJIT_ASSERT(jNext->getType() == HLNode::kTypeLabel); + + VarState* savedState = saveState(); + node->setState(savedState); + + compiler->_setCursor(node); + switchState(static_cast(jNext)->getState()); + next = jTarget; + } + else { + node->setState(saveState()); + next = X86Context_getJccFlow(node); + } + } + } + else if (node_->isRet()) { + ASMJIT_PROPAGATE_ERROR( + X86Context_translateRet(this, static_cast(node_), func->getExitNode())); + } + break; + } + + // ---------------------------------------------------------------------- + // [Func] + // ---------------------------------------------------------------------- + + case HLNode::kTypeFunc: { + ASMJIT_ASSERT(node_ == func); + + X86FuncDecl* decl = func->getDecl(); + X86VarMap* map = func->getMap(); + + if (map != nullptr) { + uint32_t i; + uint32_t argCount = func->_x86Decl.getNumArgs(); + + for (i = 0; i < argCount; i++) { + const FuncInOut& arg = decl->getArg(i); + + VarData* vd = func->getArg(i); + if (vd == nullptr) + continue; + + VarAttr* va = map->findVa(vd); + ASMJIT_ASSERT(va != nullptr); + + if (va->getFlags() & kVarAttrUnuse) + continue; + + uint32_t regIndex = va->getOutRegIndex(); + if (regIndex != kInvalidReg && (va->getFlags() & kVarAttrWConv) == 0) { + switch (vd->getClass()) { + case kX86RegClassGp : attach(vd, regIndex, true); break; + case kX86RegClassMm : attach(vd, regIndex, true); break; + case kX86RegClassXyz: attach(vd, regIndex, true); break; + } + } + else if (va->hasFlag(kVarAttrWConv)) { + // TODO: [COMPILER] Function Argument Conversion. + ASMJIT_NOT_REACHED(); + } + else { + vd->_isMemArg = true; + vd->setMemOffset(arg.getStackOffset()); + vd->setState(kVarStateMem); + } + } + } + break; + } + + // ---------------------------------------------------------------------- + // [End] + // ---------------------------------------------------------------------- + + case HLNode::kTypeSentinel: { + goto _NextGroup; + } + + default: + break; + } + + if (next == stop) + goto _NextGroup; + node_ = next; + } + +_Done: + ASMJIT_PROPAGATE_ERROR(X86Context_initFunc(this, func)); + ASMJIT_PROPAGATE_ERROR(X86Context_patchFuncMem(this, func, stop)); + ASMJIT_PROPAGATE_ERROR(X86Context_translatePrologEpilog(this, func)); + + ASMJIT_TLOG("[T] ======= Translate (End)\n"); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86Context - Serialize] +// ============================================================================ + +Error X86Context::serialize(Assembler* assembler_, HLNode* start, HLNode* stop) { + X86Assembler* assembler = static_cast(assembler_); + HLNode* node_ = start; + +#if !defined(ASMJIT_DISABLE_LOGGER) + Logger* logger = assembler->getLogger(); +#endif // !ASMJIT_DISABLE_LOGGER + + do { +#if !defined(ASMJIT_DISABLE_LOGGER) + if (logger) { + _stringBuilder.clear(); + formatInlineComment(_stringBuilder, node_); + assembler->_comment = _stringBuilder.getData(); + } +#endif // !ASMJIT_DISABLE_LOGGER + + switch (node_->getType()) { + case HLNode::kTypeAlign: { + HLAlign* node = static_cast(node_); + assembler->align(node->getAlignMode(), node->getOffset()); + break; + } + + case HLNode::kTypeData: { + HLData* node = static_cast(node_); + assembler->embed(node->getData(), node->getSize()); + break; + } + + case HLNode::kTypeComment: { +#if !defined(ASMJIT_DISABLE_LOGGER) + HLComment* node = static_cast(node_); + if (logger) + logger->logFormat(Logger::kStyleComment, + "%s; %s\n", logger->getIndentation(), node->getComment()); +#endif // !ASMJIT_DISABLE_LOGGER + break; + } + + case HLNode::kTypeHint: { + break; + } + + case HLNode::kTypeLabel: { + HLLabel* node = static_cast(node_); + assembler->bind(node->getLabel()); + break; + } + + case HLNode::kTypeInst: { + HLInst* node = static_cast(node_); + + uint32_t instId = node->getInstId(); + uint32_t opCount = node->getOpCount(); + + const Operand* opList = node->getOpList(); + assembler->_instOptions = node->getOptions(); + + const Operand* o0 = &noOperand; + const Operand* o1 = &noOperand; + const Operand* o2 = &noOperand; + const Operand* o3 = &noOperand; + + if (node->isSpecial()) { + switch (instId) { + case kX86InstIdCpuid: + break; + + case kX86InstIdCbw: + case kX86InstIdCdq: + case kX86InstIdCdqe: + case kX86InstIdCwd: + case kX86InstIdCwde: + case kX86InstIdCqo: + break; + + case kX86InstIdCmpxchg: + o0 = &opList[1]; + o1 = &opList[2]; + break; + + case kX86InstIdCmpxchg8b: + case kX86InstIdCmpxchg16b: + o0 = &opList[4]; + break; + + case kX86InstIdDaa: + case kX86InstIdDas: + break; + + case kX86InstIdImul: + case kX86InstIdMul: + case kX86InstIdIdiv: + case kX86InstIdDiv: + // Assume "Mul/Div dst_hi (implicit), dst_lo (implicit), src (explicit)". + ASMJIT_ASSERT(opCount == 3); + o0 = &opList[2]; + break; + + case kX86InstIdMovPtr: + break; + + case kX86InstIdLahf: + case kX86InstIdSahf: + break; + + case kX86InstIdMaskmovq: + case kX86InstIdMaskmovdqu: + o0 = &opList[1]; + o1 = &opList[2]; + break; + + case kX86InstIdEnter: + o0 = &opList[0]; + o1 = &opList[1]; + break; + + case kX86InstIdLeave: + break; + + case kX86InstIdRet: + if (opCount > 0) + o0 = &opList[0]; + break; + + case kX86InstIdMonitor: + case kX86InstIdMwait: + break; + + case kX86InstIdPop: + o0 = &opList[0]; + break; + + case kX86InstIdPopa: + case kX86InstIdPopf: + break; + + case kX86InstIdPush: + o0 = &opList[0]; + break; + + case kX86InstIdPusha: + case kX86InstIdPushf: + break; + + case kX86InstIdRcl: + case kX86InstIdRcr: + case kX86InstIdRol: + case kX86InstIdRor: + case kX86InstIdSal: + case kX86InstIdSar: + case kX86InstIdShl: + case kX86InstIdShr: + o0 = &opList[0]; + o1 = &x86::cl; + break; + + case kX86InstIdShld: + case kX86InstIdShrd: + o0 = &opList[0]; + o1 = &opList[1]; + o2 = &x86::cl; + break; + + case kX86InstIdRdtsc: + case kX86InstIdRdtscp: + break; + + case kX86InstIdRepLodsB: case kX86InstIdRepLodsD: case kX86InstIdRepLodsQ: case kX86InstIdRepLodsW: + case kX86InstIdRepMovsB: case kX86InstIdRepMovsD: case kX86InstIdRepMovsQ: case kX86InstIdRepMovsW: + case kX86InstIdRepStosB: case kX86InstIdRepStosD: case kX86InstIdRepStosQ: case kX86InstIdRepStosW: + case kX86InstIdRepeCmpsB: case kX86InstIdRepeCmpsD: case kX86InstIdRepeCmpsQ: case kX86InstIdRepeCmpsW: + case kX86InstIdRepeScasB: case kX86InstIdRepeScasD: case kX86InstIdRepeScasQ: case kX86InstIdRepeScasW: + case kX86InstIdRepneCmpsB: case kX86InstIdRepneCmpsD: case kX86InstIdRepneCmpsQ: case kX86InstIdRepneCmpsW: + case kX86InstIdRepneScasB: case kX86InstIdRepneScasD: case kX86InstIdRepneScasQ: case kX86InstIdRepneScasW: + break; + + case kX86InstIdXrstor: + case kX86InstIdXrstor64: + case kX86InstIdXsave: + case kX86InstIdXsave64: + case kX86InstIdXsaveopt: + case kX86InstIdXsaveopt64: + o0 = &opList[0]; + break; + + case kX86InstIdXgetbv: + case kX86InstIdXsetbv: + break; + + default: + ASMJIT_NOT_REACHED(); + } + } + else { + if (opCount > 0) o0 = &opList[0]; + if (opCount > 1) o1 = &opList[1]; + if (opCount > 2) o2 = &opList[2]; + if (opCount > 3) o3 = &opList[3]; + } + + // Should call _emit() directly as 4 operand form is the main form. + assembler->emit(instId, *o0, *o1, *o2, *o3); + break; + } + + // Function scope and return is translated to another nodes, no special + // handling is required at this point. + case HLNode::kTypeFunc: + case HLNode::kTypeSentinel: + case HLNode::kTypeRet: { + break; + } + + // Function call adds nodes before and after, but it's required to emit + // the call instruction by itself. + case HLNode::kTypeCall: { + X86CallNode* node = static_cast(node_); + assembler->emit(kX86InstIdCall, node->_target, noOperand, noOperand); + break; + } + + default: + break; + } + + node_ = node_->getNext(); + } while (node_ != stop); + + return kErrorOk; +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER && (ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64) diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86compilercontext_p.h b/DynamicHooks/thirdparty/AsmJit/x86/x86compilercontext_p.h new file mode 100644 index 0000000..3b86fd3 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86compilercontext_p.h @@ -0,0 +1,726 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86COMPILERCONTEXT_P_H +#define _ASMJIT_X86_X86COMPILERCONTEXT_P_H + +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/compiler.h" +#include "../base/compilercontext_p.h" +#include "../base/utils.h" +#include "../x86/x86assembler.h" +#include "../x86/x86compiler.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::X86VarMap] +// ============================================================================ + +struct X86VarMap : public VarMap { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get variable-attributes list as VarAttr data. + ASMJIT_INLINE VarAttr* getVaList() const { + return const_cast(_list); + } + + //! Get variable-attributes list as VarAttr data (by class). + ASMJIT_INLINE VarAttr* getVaListByClass(uint32_t rc) const { + return const_cast(_list) + _start.get(rc); + } + + //! Get position of variables (by class). + ASMJIT_INLINE uint32_t getVaStart(uint32_t rc) const { + return _start.get(rc); + } + + //! Get count of variables (by class). + ASMJIT_INLINE uint32_t getVaCountByClass(uint32_t rc) const { + return _count.get(rc); + } + + //! Get VarAttr at `index`. + ASMJIT_INLINE VarAttr* getVa(uint32_t index) const { + ASMJIT_ASSERT(index < _vaCount); + return getVaList() + index; + } + + //! Get VarAttr of `c` class at `index`. + ASMJIT_INLINE VarAttr* getVaByClass(uint32_t rc, uint32_t index) const { + ASMJIT_ASSERT(index < _count._regs[rc]); + return getVaListByClass(rc) + index; + } + + // -------------------------------------------------------------------------- + // [Utils] + // -------------------------------------------------------------------------- + + //! Find VarAttr. + ASMJIT_INLINE VarAttr* findVa(VarData* vd) const { + VarAttr* list = getVaList(); + uint32_t count = getVaCount(); + + for (uint32_t i = 0; i < count; i++) + if (list[i].getVd() == vd) + return &list[i]; + + return nullptr; + } + + //! Find VarAttr (by class). + ASMJIT_INLINE VarAttr* findVaByClass(uint32_t rc, VarData* vd) const { + VarAttr* list = getVaListByClass(rc); + uint32_t count = getVaCountByClass(rc); + + for (uint32_t i = 0; i < count; i++) + if (list[i].getVd() == vd) + return &list[i]; + + return nullptr; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Special registers on input. + //! + //! Special register(s) restricted to one or more physical register. If there + //! is more than one special register it means that we have to duplicate the + //! variable content to all of them (it means that the same varible was used + //! by two or more operands). We forget about duplicates after the register + //! allocation finishes and marks all duplicates as non-assigned. + X86RegMask _inRegs; + + //! Special registers on output. + //! + //! Special register(s) used on output. Each variable can have only one + //! special register on the output, 'X86VarMap' contains all registers from + //! all 'VarAttr's. + X86RegMask _outRegs; + + //! Clobbered registers (by a function call). + X86RegMask _clobberedRegs; + + //! Start indexes of variables per register class. + X86RegCount _start; + //! Count of variables per register class. + X86RegCount _count; + + //! VarAttr list. + VarAttr _list[1]; +}; + +// ============================================================================ +// [asmjit::X86StateCell] +// ============================================================================ + +//! X86/X64 state-cell. +union X86StateCell { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t getState() const { + return _state; + } + + ASMJIT_INLINE void setState(uint32_t state) { + _state = static_cast(state); + } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() { _packed = 0; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uint8_t _packed; + + struct { + uint8_t _state : 2; + uint8_t _unused : 6; + }; +}; + +// ============================================================================ +// [asmjit::X86VarState] +// ============================================================================ + +//! X86/X64 state. +struct X86VarState : VarState { + enum { + //! Base index of GP registers. + kGpIndex = 0, + //! Count of GP registers. + kGpCount = 16, + + //! Base index of MMX registers. + kMmIndex = kGpIndex + kGpCount, + //! Count of Mm registers. + kMmCount = 8, + + //! Base index of XMM registers. + kXmmIndex = kMmIndex + kMmCount, + //! Count of XMM registers. + kXmmCount = 16, + + //! Count of all registers in `X86VarState`. + kAllCount = kXmmIndex + kXmmCount + }; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE VarData** getList() { + return _list; + } + + ASMJIT_INLINE VarData** getListByClass(uint32_t rc) { + switch (rc) { + case kX86RegClassGp : return _listGp; + case kX86RegClassMm : return _listMm; + case kX86RegClassXyz: return _listXmm; + + default: + return nullptr; + } + } + + // -------------------------------------------------------------------------- + // [Clear] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset(size_t numCells) { + ::memset(this, 0, kAllCount * sizeof(VarData*) + + 2 * sizeof(X86RegMask) + + numCells * sizeof(X86StateCell)); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + //! List of all allocated variables in one array. + VarData* _list[kAllCount]; + + struct { + //! Allocated GP registers. + VarData* _listGp[kGpCount]; + //! Allocated MMX registers. + VarData* _listMm[kMmCount]; + //! Allocated XMM registers. + VarData* _listXmm[kXmmCount]; + }; + }; + + //! Occupied registers (mask). + X86RegMask _occupied; + //! Modified registers (mask). + X86RegMask _modified; + + //! Variables data, the length is stored in `X86Context`. + X86StateCell _cells[1]; +}; + +// ============================================================================ +// [asmjit::X86Context] +// ============================================================================ + +#if defined(ASMJIT_DEBUG) +# define ASMJIT_X86_CHECK_STATE _checkState(); +#else +# define ASMJIT_X86_CHECK_STATE +#endif // ASMJIT_DEBUG + +//! \internal +//! +//! Compiler context, used by `X86Compiler`. +//! +//! Compiler context takes care of generating function prolog and epilog, and +//! also performs register allocation. It's used during the compilation phase +//! and considered an implementation detail and asmjit consumers don't have +//! access to it. The context is used once per function and it's reset after +//! the function is processed. +struct X86Context : public Context { + ASMJIT_NO_COPY(X86Context) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `X86Context` instance. + X86Context(X86Compiler* compiler); + //! Destroy the `X86Context` instance. + virtual ~X86Context(); + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + virtual void reset(bool releaseMemory = false) override; + + // -------------------------------------------------------------------------- + // [Arch] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool isX64() const { return _zsp.getSize() == 16; } + ASMJIT_INLINE uint32_t getRegSize() const { return _zsp.getSize(); } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! Get compiler as `X86Compiler`. + ASMJIT_INLINE X86Compiler* getCompiler() const { return static_cast(_compiler); } + //! Get function as `X86FuncNode`. + ASMJIT_INLINE X86FuncNode* getFunc() const { return reinterpret_cast(_func); } + //! Get clobbered registers (global). + ASMJIT_INLINE uint32_t getClobberedRegs(uint32_t rc) { return _clobberedRegs.get(rc); } + + // -------------------------------------------------------------------------- + // [Helpers] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86VarMap* newVarMap(uint32_t vaCount) { + return static_cast( + _zoneAllocator.alloc(sizeof(X86VarMap) + vaCount * sizeof(VarAttr))); + } + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + void emitLoad(VarData* vd, uint32_t regIndex, const char* reason); + void emitSave(VarData* vd, uint32_t regIndex, const char* reason); + void emitMove(VarData* vd, uint32_t toRegIndex, uint32_t fromRegIndex, const char* reason); + void emitSwapGp(VarData* aVd, VarData* bVd, uint32_t aIndex, uint32_t bIndex, const char* reason); + + void emitPushSequence(uint32_t regs); + void emitPopSequence(uint32_t regs); + + void emitConvertVarToVar(uint32_t dstType, uint32_t dstIndex, uint32_t srcType, uint32_t srcIndex); + void emitMoveVarOnStack(uint32_t dstType, const X86Mem* dst, uint32_t srcType, uint32_t srcIndex); + void emitMoveImmOnStack(uint32_t dstType, const X86Mem* dst, const Imm* src); + + void emitMoveImmToReg(uint32_t dstType, uint32_t dstIndex, const Imm* src); + + // -------------------------------------------------------------------------- + // [Register Management] + // -------------------------------------------------------------------------- + + void _checkState(); + + // -------------------------------------------------------------------------- + // [Attach / Detach] + // -------------------------------------------------------------------------- + + //! Attach. + //! + //! Attach a register to the 'VarData', changing 'VarData' members to show + //! that the variable is currently alive and linking variable with the + //! current 'X86VarState'. + template + ASMJIT_INLINE void attach(VarData* vd, uint32_t regIndex, bool modified) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(regIndex != kInvalidReg); + + // Prevent Esp allocation if C==Gp. + ASMJIT_ASSERT(C != kX86RegClassGp || regIndex != kX86RegIndexSp); + + uint32_t regMask = Utils::mask(regIndex); + + vd->setState(kVarStateReg); + vd->setModified(modified); + vd->setRegIndex(regIndex); + vd->addHomeIndex(regIndex); + + _x86State.getListByClass(C)[regIndex] = vd; + _x86State._occupied.or_(C, regMask); + _x86State._modified.or_(C, static_cast(modified) << regIndex); + + ASMJIT_X86_CHECK_STATE + } + + //! Detach. + //! + //! The opposite of 'Attach'. Detach resets the members in 'VarData' + //! (regIndex, state and changed flags) and unlinks the variable with the + //! current 'X86VarState'. + template + ASMJIT_INLINE void detach(VarData* vd, uint32_t regIndex, uint32_t vState) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getRegIndex() == regIndex); + ASMJIT_ASSERT(vState != kVarStateReg); + + uint32_t regMask = Utils::mask(regIndex); + + vd->setState(vState); + vd->resetRegIndex(); + vd->setModified(false); + + _x86State.getListByClass(C)[regIndex] = nullptr; + _x86State._occupied.andNot(C, regMask); + _x86State._modified.andNot(C, regMask); + + ASMJIT_X86_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Rebase] + // -------------------------------------------------------------------------- + + //! Rebase. + //! + //! Change the register of the 'VarData' changing also the current 'X86VarState'. + //! Rebase is nearly identical to 'Detach' and 'Attach' sequence, but doesn't + //! change the `VarData`s modified flag. + template + ASMJIT_INLINE void rebase(VarData* vd, uint32_t newRegIndex, uint32_t oldRegIndex) { + ASMJIT_ASSERT(vd->getClass() == C); + + uint32_t newRegMask = Utils::mask(newRegIndex); + uint32_t oldRegMask = Utils::mask(oldRegIndex); + uint32_t bothRegMask = newRegMask ^ oldRegMask; + + vd->setRegIndex(newRegIndex); + + _x86State.getListByClass(C)[oldRegIndex] = nullptr; + _x86State.getListByClass(C)[newRegIndex] = vd; + + _x86State._occupied.xor_(C, bothRegMask); + _x86State._modified.xor_(C, bothRegMask & -static_cast(vd->isModified())); + + ASMJIT_X86_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Load / Save] + // -------------------------------------------------------------------------- + + //! Load. + //! + //! Load variable from its memory slot to a register, emitting 'Load' + //! instruction and changing the variable state to allocated. + template + ASMJIT_INLINE void load(VarData* vd, uint32_t regIndex) { + // Can be only called if variable is not allocated. + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() != kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() == kInvalidReg); + + emitLoad(vd, regIndex, "Load"); + attach(vd, regIndex, false); + + ASMJIT_X86_CHECK_STATE + } + + //! Save. + //! + //! Save the variable into its home location, but keep it as allocated. + template + ASMJIT_INLINE void save(VarData* vd) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() == kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = Utils::mask(regIndex); + + emitSave(vd, regIndex, "Save"); + + vd->setModified(false); + _x86State._modified.andNot(C, regMask); + + ASMJIT_X86_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Move / Swap] + // -------------------------------------------------------------------------- + + //! Move a register. + //! + //! Move register from one index to another, emitting 'Move' if needed. This + //! function does nothing if register is already at the given index. + template + ASMJIT_INLINE void move(VarData* vd, uint32_t regIndex) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() == kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + + uint32_t oldIndex = vd->getRegIndex(); + if (regIndex != oldIndex) { + emitMove(vd, regIndex, oldIndex, "Move"); + rebase(vd, regIndex, oldIndex); + } + + ASMJIT_X86_CHECK_STATE + } + + //! Swap two registers + //! + //! It's only possible to swap Gp registers. + ASMJIT_INLINE void swapGp(VarData* aVd, VarData* bVd) { + ASMJIT_ASSERT(aVd != bVd); + + ASMJIT_ASSERT(aVd->getClass() == kX86RegClassGp); + ASMJIT_ASSERT(aVd->getState() == kVarStateReg); + ASMJIT_ASSERT(aVd->getRegIndex() != kInvalidReg); + + ASMJIT_ASSERT(bVd->getClass() == kX86RegClassGp); + ASMJIT_ASSERT(bVd->getState() == kVarStateReg); + ASMJIT_ASSERT(bVd->getRegIndex() != kInvalidReg); + + uint32_t aIndex = aVd->getRegIndex(); + uint32_t bIndex = bVd->getRegIndex(); + + emitSwapGp(aVd, bVd, aIndex, bIndex, "Swap"); + + aVd->setRegIndex(bIndex); + bVd->setRegIndex(aIndex); + + _x86State.getListByClass(kX86RegClassGp)[aIndex] = bVd; + _x86State.getListByClass(kX86RegClassGp)[bIndex] = aVd; + + uint32_t m = aVd->isModified() ^ bVd->isModified(); + _x86State._modified.xor_(kX86RegClassGp, (m << aIndex) | (m << bIndex)); + + ASMJIT_X86_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Alloc / Spill] + // -------------------------------------------------------------------------- + + //! Alloc. + template + ASMJIT_INLINE void alloc(VarData* vd, uint32_t regIndex) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(regIndex != kInvalidReg); + + uint32_t oldRegIndex = vd->getRegIndex(); + uint32_t oldState = vd->getState(); + uint32_t regMask = Utils::mask(regIndex); + + ASMJIT_ASSERT(_x86State.getListByClass(C)[regIndex] == nullptr || regIndex == oldRegIndex); + + if (oldState != kVarStateReg) { + if (oldState == kVarStateMem) + emitLoad(vd, regIndex, "Alloc"); + vd->setModified(false); + } + else if (oldRegIndex != regIndex) { + emitMove(vd, regIndex, oldRegIndex, "Alloc"); + + _x86State.getListByClass(C)[oldRegIndex] = nullptr; + regMask ^= Utils::mask(oldRegIndex); + } + else { + ASMJIT_X86_CHECK_STATE + return; + } + + vd->setState(kVarStateReg); + vd->setRegIndex(regIndex); + vd->addHomeIndex(regIndex); + + _x86State.getListByClass(C)[regIndex] = vd; + _x86State._occupied.xor_(C, regMask); + _x86State._modified.xor_(C, regMask & -static_cast(vd->isModified())); + + ASMJIT_X86_CHECK_STATE + } + + //! Spill. + //! + //! Spill variable/register, saves the content to the memory-home if modified. + template + ASMJIT_INLINE void spill(VarData* vd) { + ASMJIT_ASSERT(vd->getClass() == C); + + if (vd->getState() != kVarStateReg) { + ASMJIT_X86_CHECK_STATE + return; + } + + uint32_t regIndex = vd->getRegIndex(); + + ASMJIT_ASSERT(regIndex != kInvalidReg); + ASMJIT_ASSERT(_x86State.getListByClass(C)[regIndex] == vd); + + if (vd->isModified()) + emitSave(vd, regIndex, "Spill"); + detach(vd, regIndex, kVarStateMem); + + ASMJIT_X86_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Modify] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void modify(VarData* vd) { + ASMJIT_ASSERT(vd->getClass() == C); + + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = Utils::mask(regIndex); + + vd->setModified(true); + _x86State._modified.or_(C, regMask); + + ASMJIT_X86_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Unuse] + // -------------------------------------------------------------------------- + + //! Unuse. + //! + //! Unuse variable, it will be detached it if it's allocated then its state + //! will be changed to kVarStateNone. + template + ASMJIT_INLINE void unuse(VarData* vd, uint32_t vState = kVarStateNone) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vState != kVarStateReg); + + uint32_t regIndex = vd->getRegIndex(); + if (regIndex != kInvalidReg) + detach(vd, regIndex, vState); + else + vd->setState(vState); + + ASMJIT_X86_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [State] + // -------------------------------------------------------------------------- + + //! Get state as `X86VarState`. + ASMJIT_INLINE X86VarState* getState() const { + return const_cast(&_x86State); + } + + virtual void loadState(VarState* src); + virtual VarState* saveState(); + + virtual void switchState(VarState* src); + virtual void intersectStates(VarState* a, VarState* b); + + // -------------------------------------------------------------------------- + // [Memory] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86Mem getVarMem(VarData* vd) { + (void)getVarCell(vd); + + X86Mem mem(_memSlot); + mem.setBase(vd->getId()); + return mem; + } + + // -------------------------------------------------------------------------- + // [Fetch] + // -------------------------------------------------------------------------- + + virtual Error fetch(); + + // -------------------------------------------------------------------------- + // [Annotate] + // -------------------------------------------------------------------------- + + virtual Error annotate(); + + // -------------------------------------------------------------------------- + // [Translate] + // -------------------------------------------------------------------------- + + virtual Error translate(); + + // -------------------------------------------------------------------------- + // [Serialize] + // -------------------------------------------------------------------------- + + virtual Error serialize(Assembler* assembler, HLNode* start, HLNode* stop); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Count of X86/X64 registers. + X86RegCount _regCount; + //! X86/X64 stack-pointer (esp or rsp). + X86GpReg _zsp; + //! X86/X64 frame-pointer (ebp or rbp). + X86GpReg _zbp; + //! Temporary memory operand. + X86Mem _memSlot; + + //! X86/X64 specific compiler state, linked to `_state`. + X86VarState _x86State; + //! Clobbered registers (for the whole function). + X86RegMask _clobberedRegs; + + //! Memory cell where is stored address used to restore manually + //! aligned stack. + VarCell* _stackFrameCell; + + //! Global allocable registers mask. + uint32_t _gaRegs[kX86RegClassCount]; + + //! Function arguments base pointer (register). + uint8_t _argBaseReg; + //! Function variables base pointer (register). + uint8_t _varBaseReg; + //! Whether to emit comments. + uint8_t _emitComments; + + //! Function arguments base offset. + int32_t _argBaseOffset; + //! Function variables base offset. + int32_t _varBaseOffset; + + //! Function arguments displacement. + int32_t _argActualDisp; + //! Function variables displacement. + int32_t _varActualDisp; + + //! Temporary string builder used for logging. + StringBuilderTmp<256> _stringBuilder; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER +#endif // _ASMJIT_X86_X86COMPILERCONTEXT_P_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86compilerfunc.cpp b/DynamicHooks/thirdparty/AsmJit/x86/x86compilerfunc.cpp new file mode 100644 index 0000000..446c28e --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86compilerfunc.cpp @@ -0,0 +1,551 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) && (defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64)) + +// [Dependencies] +#include "../x86/x86compiler.h" +#include "../x86/x86compilerfunc.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::X86FuncDecl - Helpers] +// ============================================================================ + +static ASMJIT_INLINE bool x86ArgIsInt(uint32_t aType) { + ASMJIT_ASSERT(aType < kX86VarTypeCount); + return Utils::inInterval(aType, _kVarTypeIntStart, _kVarTypeIntEnd); +} + +static ASMJIT_INLINE bool x86ArgIsFp(uint32_t aType) { + ASMJIT_ASSERT(aType < kX86VarTypeCount); + return Utils::inInterval(aType, _kVarTypeFpStart, _kVarTypeFpEnd); +} + +static ASMJIT_INLINE uint32_t x86ArgTypeToXmmType(uint32_t aType) { + if (aType == kVarTypeFp32) return kX86VarTypeXmmSs; + if (aType == kVarTypeFp64) return kX86VarTypeXmmSd; + return aType; +} + +//! Get an architecture depending on the calling convention `callConv`. +//! +//! Returns `kArchNone`, `kArchX86`, or `kArchX64`. +static ASMJIT_INLINE uint32_t x86GetArchFromCConv(uint32_t callConv) { + if (Utils::inInterval(callConv, _kCallConvX86Start, _kCallConvX86End)) return kArchX86; + if (Utils::inInterval(callConv, _kCallConvX64Start, _kCallConvX64End)) return kArchX64; + + return kArchNone; +} + +// ============================================================================ +// [asmjit::X86FuncDecl - SetPrototype] +// ============================================================================ + +#define R(_Index_) kX86RegIndex##_Index_ +static uint32_t X86FuncDecl_initConv(X86FuncDecl* self, uint32_t arch, uint32_t callConv) { + // Setup defaults. + self->_argStackSize = 0; + self->_redZoneSize = 0; + self->_spillZoneSize = 0; + + self->_callConv = static_cast(callConv); + self->_calleePopsStack = false; + self->_argsDirection = kFuncDirRTL; + + self->_passed.reset(); + self->_preserved.reset(); + + ::memset(self->_passedOrderGp, kInvalidReg, ASMJIT_ARRAY_SIZE(self->_passedOrderGp)); + ::memset(self->_passedOrderXyz, kInvalidReg, ASMJIT_ARRAY_SIZE(self->_passedOrderXyz)); + + switch (arch) { + // ------------------------------------------------------------------------ + // [X86 Support] + // ------------------------------------------------------------------------ + +#if defined(ASMJIT_BUILD_X86) + case kArchX86: { + self->_preserved.set(kX86RegClassGp, Utils::mask(R(Bx), R(Sp), R(Bp), R(Si), R(Di))); + + switch (callConv) { + case kCallConvX86CDecl: + break; + + case kCallConvX86StdCall: + self->_calleePopsStack = true; + break; + + case kCallConvX86MsThisCall: + self->_calleePopsStack = true; + self->_passed.set(kX86RegClassGp, Utils::mask(R(Cx))); + self->_passedOrderGp[0] = R(Cx); + break; + + case kCallConvX86MsFastCall: + self->_calleePopsStack = true; + self->_passed.set(kX86RegClassGp, Utils::mask(R(Cx), R(Cx))); + self->_passedOrderGp[0] = R(Cx); + self->_passedOrderGp[1] = R(Dx); + break; + + case kCallConvX86BorlandFastCall: + self->_calleePopsStack = true; + self->_argsDirection = kFuncDirLTR; + self->_passed.set(kX86RegClassGp, Utils::mask(R(Ax), R(Dx), R(Cx))); + self->_passedOrderGp[0] = R(Ax); + self->_passedOrderGp[1] = R(Dx); + self->_passedOrderGp[2] = R(Cx); + break; + + case kCallConvX86GccFastCall: + self->_calleePopsStack = true; + self->_passed.set(kX86RegClassGp, Utils::mask(R(Cx), R(Dx))); + self->_passedOrderGp[0] = R(Cx); + self->_passedOrderGp[1] = R(Dx); + break; + + case kCallConvX86GccRegParm1: + self->_passed.set(kX86RegClassGp, Utils::mask(R(Ax))); + self->_passedOrderGp[0] = R(Ax); + break; + + case kCallConvX86GccRegParm2: + self->_passed.set(kX86RegClassGp, Utils::mask(R(Ax), R(Dx))); + self->_passedOrderGp[0] = R(Ax); + self->_passedOrderGp[1] = R(Dx); + break; + + case kCallConvX86GccRegParm3: + self->_passed.set(kX86RegClassGp, Utils::mask(R(Ax), R(Dx), R(Cx))); + self->_passedOrderGp[0] = R(Ax); + self->_passedOrderGp[1] = R(Dx); + self->_passedOrderGp[2] = R(Cx); + break; + + default: + return kErrorInvalidArgument; + } + + return kErrorOk; + } +#endif // ASMJIT_BUILD_X86 + + // ------------------------------------------------------------------------ + // [X64 Support] + // ------------------------------------------------------------------------ + +#if defined(ASMJIT_BUILD_X64) + case kArchX64: { + switch (callConv) { + case kCallConvX64Win: + self->_spillZoneSize = 32; + + self->_passed.set(kX86RegClassGp, Utils::mask(R(Cx), R(Dx), 8, 9)); + self->_passedOrderGp[0] = R(Cx); + self->_passedOrderGp[1] = R(Dx); + self->_passedOrderGp[2] = 8; + self->_passedOrderGp[3] = 9; + + self->_passed.set(kX86RegClassXyz, Utils::mask(0, 1, 2, 3)); + self->_passedOrderXyz[0] = 0; + self->_passedOrderXyz[1] = 1; + self->_passedOrderXyz[2] = 2; + self->_passedOrderXyz[3] = 3; + + self->_preserved.set(kX86RegClassGp , Utils::mask(R(Bx), R(Sp), R(Bp), R(Si), R(Di), 12, 13, 14, 15)); + self->_preserved.set(kX86RegClassXyz, Utils::mask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); + break; + + case kCallConvX64Unix: + self->_redZoneSize = 128; + + self->_passed.set(kX86RegClassGp, Utils::mask(R(Di), R(Si), R(Dx), R(Cx), 8, 9)); + self->_passedOrderGp[0] = R(Di); + self->_passedOrderGp[1] = R(Si); + self->_passedOrderGp[2] = R(Dx); + self->_passedOrderGp[3] = R(Cx); + self->_passedOrderGp[4] = 8; + self->_passedOrderGp[5] = 9; + + self->_passed.set(kX86RegClassXyz, Utils::mask(0, 1, 2, 3, 4, 5, 6, 7)); + self->_passedOrderXyz[0] = 0; + self->_passedOrderXyz[1] = 1; + self->_passedOrderXyz[2] = 2; + self->_passedOrderXyz[3] = 3; + self->_passedOrderXyz[4] = 4; + self->_passedOrderXyz[5] = 5; + self->_passedOrderXyz[6] = 6; + self->_passedOrderXyz[7] = 7; + + self->_preserved.set(kX86RegClassGp, Utils::mask(R(Bx), R(Sp), R(Bp), 12, 13, 14, 15)); + break; + + default: + return kErrorInvalidArgument; + } + + return kErrorOk; + } +#endif // ASMJIT_BUILD_X64 + + default: + return kErrorInvalidArgument; + } +} +#undef R + +static Error X86FuncDecl_initFunc(X86FuncDecl* self, uint32_t arch, + uint32_t ret, const uint32_t* args, uint32_t numArgs) { + + ASMJIT_ASSERT(numArgs <= kFuncArgCount); + + uint32_t callConv = self->_callConv; + uint32_t regSize = (arch == kArchX86) ? 4 : 8; + + int32_t i = 0; + int32_t gpPos = 0; + int32_t xmmPos = 0; + int32_t stackOffset = 0; + const uint8_t* varMapping = nullptr; + +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) + varMapping = _x86VarMapping; +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) + if (arch == kArchX64) + varMapping = _x64VarMapping; +#endif // ASMJIT_BUILD_X64 + + ASMJIT_ASSERT(varMapping != nullptr); + self->_numArgs = static_cast(numArgs); + self->_retCount = 0; + + for (i = 0; i < static_cast(numArgs); i++) { + FuncInOut& arg = self->getArg(i); + arg._varType = static_cast(varMapping[args[i]]); + arg._regIndex = kInvalidReg; + arg._stackOffset = kFuncStackInvalid; + } + + for (; i < kFuncArgCount; i++) { + self->_args[i].reset(); + } + + self->_rets[0].reset(); + self->_rets[1].reset(); + self->_argStackSize = 0; + self->_used.reset(); + + if (ret != kInvalidVar) { + ret = varMapping[ret]; + switch (ret) { + case kVarTypeInt64: + case kVarTypeUInt64: + // 64-bit value is returned in EDX:EAX on x86. +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) { + self->_retCount = 2; + self->_rets[0]._varType = kVarTypeUInt32; + self->_rets[0]._regIndex = kX86RegIndexAx; + self->_rets[1]._varType = static_cast(ret - 2); + self->_rets[1]._regIndex = kX86RegIndexDx; + } + ASMJIT_FALLTHROUGH; +#endif // ASMJIT_BUILD_X86 + + case kVarTypeInt8: + case kVarTypeUInt8: + case kVarTypeInt16: + case kVarTypeUInt16: + case kVarTypeInt32: + case kVarTypeUInt32: + self->_retCount = 1; + self->_rets[0]._varType = static_cast(ret); + self->_rets[0]._regIndex = kX86RegIndexAx; + break; + + case kX86VarTypeMm: + self->_retCount = 1; + self->_rets[0]._varType = static_cast(ret); + self->_rets[0]._regIndex = 0; + break; + + case kVarTypeFp32: + self->_retCount = 1; + if (arch == kArchX86) { + self->_rets[0]._varType = kVarTypeFp32; + self->_rets[0]._regIndex = 0; + } + else { + self->_rets[0]._varType = kX86VarTypeXmmSs; + self->_rets[0]._regIndex = 0; + } + break; + + case kVarTypeFp64: + self->_retCount = 1; + if (arch == kArchX86) { + self->_rets[0]._varType = kVarTypeFp64; + self->_rets[0]._regIndex = 0; + } + else { + self->_rets[0]._varType = kX86VarTypeXmmSd; + self->_rets[0]._regIndex = 0; + break; + } + break; + + case kX86VarTypeXmm: + case kX86VarTypeXmmSs: + case kX86VarTypeXmmSd: + case kX86VarTypeXmmPs: + case kX86VarTypeXmmPd: + self->_retCount = 1; + self->_rets[0]._varType = static_cast(ret); + self->_rets[0]._regIndex = 0; + break; + } + } + + if (self->_numArgs == 0) + return kErrorOk; + +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) { + // Register arguments (Integer), always left-to-right. + for (i = 0; i != static_cast(numArgs); i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (!x86ArgIsInt(varType) || gpPos >= ASMJIT_ARRAY_SIZE(self->_passedOrderGp)) + continue; + + if (self->_passedOrderGp[gpPos] == kInvalidReg) + continue; + + arg._regIndex = self->_passedOrderGp[gpPos++]; + self->_used.or_(kX86RegClassGp, Utils::mask(arg.getRegIndex())); + } + + // Stack arguments. + int32_t iStart = static_cast(numArgs - 1); + int32_t iEnd = -1; + int32_t iStep = -1; + + if (self->_argsDirection == kFuncDirLTR) { + iStart = 0; + iEnd = static_cast(numArgs); + iStep = 1; + } + + for (i = iStart; i != iEnd; i += iStep) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (arg.hasRegIndex()) + continue; + + if (x86ArgIsInt(varType)) { + stackOffset -= 4; + arg._stackOffset = static_cast(stackOffset); + } + else if (x86ArgIsFp(varType)) { + int32_t size = static_cast(_x86VarInfo[varType].getSize()); + stackOffset -= size; + arg._stackOffset = static_cast(stackOffset); + } + } + } +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) + if (arch == kArchX64) { + if (callConv == kCallConvX64Win) { + int32_t argMax = Utils::iMin(numArgs, 4); + + // Register arguments (GP/XMM), always left-to-right. + for (i = 0; i != argMax; i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (x86ArgIsInt(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderGp)) { + arg._regIndex = self->_passedOrderGp[i]; + self->_used.or_(kX86RegClassGp, Utils::mask(arg.getRegIndex())); + continue; + } + + if (x86ArgIsFp(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderXyz)) { + arg._varType = static_cast(x86ArgTypeToXmmType(varType)); + arg._regIndex = self->_passedOrderXyz[i]; + self->_used.or_(kX86RegClassXyz, Utils::mask(arg.getRegIndex())); + } + } + + // Stack arguments (always right-to-left). + for (i = numArgs - 1; i != -1; i--) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (arg.hasRegIndex()) + continue; + + if (x86ArgIsInt(varType)) { + stackOffset -= 8; // Always 8 bytes. + arg._stackOffset = stackOffset; + } + else if (x86ArgIsFp(varType)) { + stackOffset -= 8; // Always 8 bytes (float/double). + arg._stackOffset = stackOffset; + } + } + + // 32 bytes shadow space (X64W calling convention specific). + stackOffset -= 4 * 8; + } + else { + // Register arguments (Gp), always left-to-right. + for (i = 0; i != static_cast(numArgs); i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (!x86ArgIsInt(varType) || gpPos >= ASMJIT_ARRAY_SIZE(self->_passedOrderGp)) + continue; + + if (self->_passedOrderGp[gpPos] == kInvalidReg) + continue; + + arg._regIndex = self->_passedOrderGp[gpPos++]; + self->_used.or_(kX86RegClassGp, Utils::mask(arg.getRegIndex())); + } + + // Register arguments (XMM), always left-to-right. + for (i = 0; i != static_cast(numArgs); i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (x86ArgIsFp(varType)) { + arg._varType = static_cast(x86ArgTypeToXmmType(varType)); + arg._regIndex = self->_passedOrderXyz[xmmPos++]; + self->_used.or_(kX86RegClassXyz, Utils::mask(arg.getRegIndex())); + } + } + + // Stack arguments. + for (i = numArgs - 1; i != -1; i--) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (arg.hasRegIndex()) + continue; + + if (x86ArgIsInt(varType)) { + stackOffset -= 8; + arg._stackOffset = static_cast(stackOffset); + } + else if (x86ArgIsFp(varType)) { + int32_t size = static_cast(_x86VarInfo[varType].getSize()); + + stackOffset -= size; + arg._stackOffset = static_cast(stackOffset); + } + } + } + } +#endif // ASMJIT_BUILD_X64 + + // Modify the stack offset, thus in result all parameters would have positive + // non-zero stack offset. + for (i = 0; i < static_cast(numArgs); i++) { + FuncInOut& arg = self->getArg(i); + if (!arg.hasRegIndex()) { + arg._stackOffset += static_cast(static_cast(regSize) - stackOffset); + } + } + + self->_argStackSize = static_cast(-stackOffset); + return kErrorOk; +} + +Error X86FuncDecl::setPrototype(const FuncPrototype& p) { + uint32_t callConv = p.getCallConv(); + uint32_t arch = x86GetArchFromCConv(callConv); + + if (arch == kArchNone) + return kErrorInvalidArgument; + + if (p.getNumArgs() > kFuncArgCount) + return kErrorInvalidArgument; + + // Validate that the required convention is supported by the current asmjit + // configuration, if only one target is compiled. +#if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_BUILD_X64) + if (arch == kArchX64) + return kErrorInvalidState; +#endif // ASMJIT_BUILD_X86 && !ASMJIT_BUILD_X64 + +#if !defined(ASMJIT_BUILD_X86) && defined(ASMJIT_BUILD_X64) + if (arch == kArchX86) + return kErrorInvalidState; +#endif // !ASMJIT_BUILD_X86 && ASMJIT_BUILD_X64 + + ASMJIT_PROPAGATE_ERROR(X86FuncDecl_initConv(this, arch, callConv)); + ASMJIT_PROPAGATE_ERROR(X86FuncDecl_initFunc(this, arch, p.getRet(), p.getArgs(), p.getNumArgs())); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86FuncDecl - Reset] +// ============================================================================ + +void X86FuncDecl::reset() { + uint32_t i; + + _callConv = kCallConvNone; + _calleePopsStack = false; + _argsDirection = kFuncDirRTL; + _reserved0 = 0; + + _numArgs = 0; + _retCount = 0; + + _argStackSize = 0; + _redZoneSize = 0; + _spillZoneSize = 0; + + for (i = 0; i < ASMJIT_ARRAY_SIZE(_args); i++) + _args[i].reset(); + + _rets[0].reset(); + _rets[1].reset(); + + _used.reset(); + _passed.reset(); + _preserved.reset(); + + ::memset(_passedOrderGp, kInvalidReg, ASMJIT_ARRAY_SIZE(_passedOrderGp)); + ::memset(_passedOrderXyz, kInvalidReg, ASMJIT_ARRAY_SIZE(_passedOrderXyz)); +} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER && (ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64) diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86compilerfunc.h b/DynamicHooks/thirdparty/AsmJit/x86/x86compilerfunc.h new file mode 100644 index 0000000..71e4986 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86compilerfunc.h @@ -0,0 +1,133 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86COMPILERFUNC_P_H +#define _ASMJIT_X86_X86COMPILERFUNC_P_H + +#include "../build.h" +#if !defined(ASMJIT_DISABLE_COMPILER) + +// [Dependencies] +#include "../base/compilerfunc.h" +#include "../x86/x86operand.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::TypeId] +// ============================================================================ + +#if !defined(ASMJIT_DOCGEN) +ASMJIT_TYPE_ID(X86MmReg, kX86VarTypeMm); +ASMJIT_TYPE_ID(X86MmVar, kX86VarTypeMm); +ASMJIT_TYPE_ID(X86XmmReg, kX86VarTypeXmm); +ASMJIT_TYPE_ID(X86XmmVar, kX86VarTypeXmm); +ASMJIT_TYPE_ID(X86YmmReg, kX86VarTypeYmm); +ASMJIT_TYPE_ID(X86YmmVar, kX86VarTypeYmm); +ASMJIT_TYPE_ID(X86ZmmReg, kX86VarTypeZmm); +ASMJIT_TYPE_ID(X86ZmmVar, kX86VarTypeZmm); +#endif // !ASMJIT_DOCGEN + +// ============================================================================ +// [asmjit::X86FuncDecl] +// ============================================================================ + +//! X86 function, including calling convention, arguments and their +//! register indices or stack positions. +struct X86FuncDecl : public FuncDecl { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new `X86FuncDecl` instance. + ASMJIT_INLINE X86FuncDecl() { reset(); } + + // -------------------------------------------------------------------------- + // [Accessors - X86] + // -------------------------------------------------------------------------- + + //! Get used registers mask for the given register class `rc`. + //! + //! NOTE: The result depends on the function calling convention AND the + //! function prototype. Returned mask contains only registers actually used + //! to pass function arguments. + ASMJIT_INLINE uint32_t getUsed(uint32_t rc) const { return _used.get(rc); } + + //! Get passed registers mask for the given register class `rc`. + //! + //! NOTE: The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE uint32_t getPassed(uint32_t rc) const { return _passed.get(rc); } + + //! Get preserved registers mask for the given register class `rc`. + //! + //! NOTE: The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE uint32_t getPreserved(uint32_t rc) const { return _preserved.get(rc); } + + //! Get ther order of passed registers (GP). + //! + //! NOTE: The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE const uint8_t* getPassedOrderGp() const { return _passedOrderGp; } + + //! Get ther order of passed registers (XMM/YMM/ZMM). + //! + //! NOTE: The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE const uint8_t* getPassedOrderXyz() const { return _passedOrderXyz; } + + // -------------------------------------------------------------------------- + // [SetPrototype] + // -------------------------------------------------------------------------- + + //! Set function prototype. + //! + //! This will set function calling convention and setup arguments variables. + //! + //! NOTE: This function will allocate variables, it can be called only once. + ASMJIT_API Error setPrototype(const FuncPrototype& p); + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_API void reset(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Used registers. + X86RegMask _used; + //! Passed registers (defined by the calling convention). + X86RegMask _passed; + //! Preserved registers (defined by the calling convention). + X86RegMask _preserved; + + //! Order of registers used to pass GP function arguments. + uint8_t _passedOrderGp[8]; + //! Order of registers used to pass XMM/YMM/ZMM function arguments. + uint8_t _passedOrderXyz[8]; +}; + +//! \} + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // !ASMJIT_DISABLE_COMPILER +#endif // _ASMJIT_X86_X86COMPILERFUNC_P_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86inst.cpp b/DynamicHooks/thirdparty/AsmJit/x86/x86inst.cpp new file mode 100644 index 0000000..078e603 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86inst.cpp @@ -0,0 +1,3094 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies] +#include "../x86/x86inst.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Enums (Internal)] +// ============================================================================ + +//! \internal +enum { + // REX/VEX. + kX86InstTable_L__ = (0) << kX86InstOpCode_L_Shift, // L is operand-based or unspecified. + kX86InstTable_L_I = (0) << kX86InstOpCode_L_Shift, // L is ignored (LIG). + kX86InstTable_L_0 = (0) << kX86InstOpCode_L_Shift, // L has to be zero. + kX86InstTable_L_L = (1) << kX86InstOpCode_L_Shift, // L has to be set. + + kX86InstTable_W__ = (0) << kX86InstOpCode_W_Shift, // W is operand-based or unspecified. + kX86InstTable_W_I = (0) << kX86InstOpCode_W_Shift, // W is ignored (WIG). + kX86InstTable_W_0 = (0) << kX86InstOpCode_W_Shift, // W has to be zero. + kX86InstTable_W_W = (1) << kX86InstOpCode_W_Shift, // W has to be set. + + // EVEX. + kX86InstTable_E__ = (0) << kX86InstOpCode_EW_Shift, // EVEX.W is operand-based or unspecified. + kX86InstTable_E_I = (0) << kX86InstOpCode_EW_Shift, // EVEX.W is ignored (WIG). + kX86InstTable_E_0 = (0) << kX86InstOpCode_EW_Shift, // EVEX.W has to be zero. + kX86InstTable_E_1 = (1) << kX86InstOpCode_EW_Shift // EVEX.W has to be set. +}; + +//! \internal +//! +//! Combined flags. +enum X86InstOpInternal { + kX86InstOpI = kX86InstOpImm, + + kX86InstOpL = kX86InstOpLabel, + kX86InstOpLImm = kX86InstOpLabel | kX86InstOpImm, + + kX86InstOpGwb = kX86InstOpGw | kX86InstOpGb, + kX86InstOpGqd = kX86InstOpGq | kX86InstOpGd, + kX86InstOpGqdw = kX86InstOpGq | kX86InstOpGd | kX86InstOpGw, + kX86InstOpGqdwb = kX86InstOpGq | kX86InstOpGd | kX86InstOpGw | kX86InstOpGb, + + kX86InstOpGbMem = kX86InstOpGb | kX86InstOpMem, + kX86InstOpGwMem = kX86InstOpGw | kX86InstOpMem, + kX86InstOpGdMem = kX86InstOpGd | kX86InstOpMem, + kX86InstOpGqMem = kX86InstOpGq | kX86InstOpMem, + kX86InstOpGwbMem = kX86InstOpGwb | kX86InstOpMem, + kX86InstOpGqdMem = kX86InstOpGqd | kX86InstOpMem, + kX86InstOpGqdwMem = kX86InstOpGqdw | kX86InstOpMem, + kX86InstOpGqdwbMem = kX86InstOpGqdwb | kX86InstOpMem, + + kX86InstOpFpMem = kX86InstOpFp | kX86InstOpMem, + kX86InstOpMmMem = kX86InstOpMm | kX86InstOpMem, + kX86InstOpKMem = kX86InstOpK | kX86InstOpMem, + kX86InstOpXmmMem = kX86InstOpXmm | kX86InstOpMem, + kX86InstOpYmmMem = kX86InstOpYmm | kX86InstOpMem, + kX86InstOpZmmMem = kX86InstOpZmm | kX86InstOpMem, + + kX86InstOpMmXmm = kX86InstOpMm | kX86InstOpXmm, + kX86InstOpMmXmmMem = kX86InstOpMmXmm | kX86InstOpMem, + + kX86InstOpXy = kX86InstOpXmm | kX86InstOpYmm, + kX86InstOpXyMem = kX86InstOpXy | kX86InstOpMem, + + kX86InstOpXyz = kX86InstOpXy | kX86InstOpZmm, + kX86InstOpXyzMem = kX86InstOpXyz | kX86InstOpMem +}; + +//! \internal +//! +//! X86/X64 Instruction AVX-512 flags (combined). +ASMJIT_ENUM(X86InstFlagsInternal) { + // FPU. + kX86InstFlagMem2_4 = kX86InstFlagMem2 | kX86InstFlagMem4, + kX86InstFlagMem2_4_8 = kX86InstFlagMem2_4 | kX86InstFlagMem8, + kX86InstFlagMem4_8 = kX86InstFlagMem4 | kX86InstFlagMem8, + kX86InstFlagMem4_8_10 = kX86InstFlagMem4_8 | kX86InstFlagMem10 +}; + +// ============================================================================ +// [Macros] +// ============================================================================ + +// Undefined. Used to distinguish (visually) between zero and unused data. +#define U 0 + +// Instruction opcodes. +#define O_000000(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_00 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_000F00(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_000F01(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F01 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_000F0F(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_000F38(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F38 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_000F3A(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_0F3A | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_660000(op, o, L, W, EvexW) (kX86InstOpCode_PP_66 | kX86InstOpCode_MM_00 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_660F00(op, o, L, W, EvexW) (kX86InstOpCode_PP_66 | kX86InstOpCode_MM_0F | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_660F38(op, o, L, W, EvexW) (kX86InstOpCode_PP_66 | kX86InstOpCode_MM_0F38 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_660F3A(op, o, L, W, EvexW) (kX86InstOpCode_PP_66 | kX86InstOpCode_MM_0F3A | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_9B0000(op, o, L, W, EvexW) (kX86InstOpCode_PP_9B | kX86InstOpCode_MM_00 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F20000(op, o, L, W, EvexW) (kX86InstOpCode_PP_F2 | kX86InstOpCode_MM_00 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F20F00(op, o, L, W, EvexW) (kX86InstOpCode_PP_F2 | kX86InstOpCode_MM_0F | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F20F38(op, o, L, W, EvexW) (kX86InstOpCode_PP_F2 | kX86InstOpCode_MM_0F38 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F20F3A(op, o, L, W, EvexW) (kX86InstOpCode_PP_F2 | kX86InstOpCode_MM_0F3A | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F30000(op, o, L, W, EvexW) (kX86InstOpCode_PP_F3 | kX86InstOpCode_MM_00 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F30F00(op, o, L, W, EvexW) (kX86InstOpCode_PP_F3 | kX86InstOpCode_MM_0F | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F30F38(op, o, L, W, EvexW) (kX86InstOpCode_PP_F3 | kX86InstOpCode_MM_0F38 | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_F30F3A(op, o, L, W, EvexW) (kX86InstOpCode_PP_F3 | kX86InstOpCode_MM_0F3A | (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) + +#define O_00_M08(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_01000| (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_00_M09(op, o, L, W, EvexW) (kX86InstOpCode_PP_00 | kX86InstOpCode_MM_01001| (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) +#define O_66_M03(op, o, L, W, EvexW) (kX86InstOpCode_PP_66 | kX86InstOpCode_MM_00011| (0x##op) | ((o) << kX86InstOpCode_O_Shift) | kX86InstTable_L_##L | kX86InstTable_W_##W | kX86InstTable_E_##EvexW) + +#define O_00_X(op, o) (kX86InstOpCode_PP_00 | (0x##op) | ((o) << kX86InstOpCode_O_Shift)) +#define O_9B_X(op, o) (kX86InstOpCode_PP_9B | (0x##op) | ((o) << kX86InstOpCode_O_Shift)) + +#define F(flag) kX86InstFlag##flag // Instruction Base Flag(s) `F(...)`. +#define A(flag) kX86InstFlagAvx512##flag // Instruction AVX-512 flag(s) `A(...)`. +#define O(flag) kX86InstOp##flag // Instruction operand flag(s) `O(...)`. +#define EF(eflags) 0 // Instruction EFLAGS `EF(OSZAPCDX)`. +#define Enc(iEnc) kX86InstEncoding##iEnc // Instruction Encoding `Enc(...)`. + +// Defines an X86/X64 instruction. +#define INST(id, name, encoding, opcode0, opcode1, instFlags, eflags, writeIndex, writeSize, op0, op1, op2, op3, op4) \ + { 0, kX86InstId##id##_ExtendedIndex, opcode0 } + +// ============================================================================ +// [asmjit::X86Inst] +// ============================================================================ + +// ${X86InstData:Begin} +// ------------------- Automatically generated, do not edit ------------------- +#if !defined(ASMJIT_DISABLE_NAMES) +static const char _x86InstNameData[] = + "\0" "adc\0" "adcx\0" "adox\0" "bextr\0" "blcfill\0" "blci\0" "blcic\0" + "blcmsk\0" "blcs\0" "blsfill\0" "blsi\0" "blsic\0" "blsmsk\0" "blsr\0" + "bsf\0" "bsr\0" "bswap\0" "bt\0" "btc\0" "btr\0" "bts\0" "bzhi\0" "call\0" + "cbw\0" "cdq\0" "cdqe\0" "clc\0" "cld\0" "clflush\0" "clflushopt\0" "cmc\0" + "cmova\0" "cmovae\0" "cmovc\0" "cmovg\0" "cmovge\0" "cmovl\0" "cmovle\0" + "cmovna\0" "cmovnae\0" "cmovnc\0" "cmovng\0" "cmovnge\0" "cmovnl\0" + "cmovnle\0" "cmovno\0" "cmovnp\0" "cmovns\0" "cmovnz\0" "cmovo\0" "cmovp\0" + "cmovpe\0" "cmovpo\0" "cmovs\0" "cmovz\0" "cmp\0" "cmpxchg\0" "cmpxchg16b\0" + "cmpxchg8b\0" "cpuid\0" "cqo\0" "crc32\0" "cvtpd2pi\0" "cvtpi2pd\0" + "cvtpi2ps\0" "cvtps2pi\0" "cvttpd2pi\0" "cvttps2pi\0" "cwd\0" "cwde\0" + "daa\0" "das\0" "enter\0" "f2xm1\0" "fabs\0" "faddp\0" "fbld\0" "fbstp\0" + "fchs\0" "fclex\0" "fcmovb\0" "fcmovbe\0" "fcmove\0" "fcmovnb\0" "fcmovnbe\0" + "fcmovne\0" "fcmovnu\0" "fcmovu\0" "fcom\0" "fcomi\0" "fcomip\0" "fcomp\0" + "fcompp\0" "fcos\0" "fdecstp\0" "fdiv\0" "fdivp\0" "fdivr\0" "fdivrp\0" + "femms\0" "ffree\0" "fiadd\0" "ficom\0" "ficomp\0" "fidiv\0" "fidivr\0" + "fild\0" "fimul\0" "fincstp\0" "finit\0" "fist\0" "fistp\0" "fisttp\0" + "fisub\0" "fisubr\0" "fld\0" "fld1\0" "fldcw\0" "fldenv\0" "fldl2e\0" + "fldl2t\0" "fldlg2\0" "fldln2\0" "fldpi\0" "fldz\0" "fmulp\0" "fnclex\0" + "fninit\0" "fnop\0" "fnsave\0" "fnstcw\0" "fnstenv\0" "fnstsw\0" "fpatan\0" + "fprem\0" "fprem1\0" "fptan\0" "frndint\0" "frstor\0" "fsave\0" "fscale\0" + "fsin\0" "fsincos\0" "fsqrt\0" "fst\0" "fstcw\0" "fstenv\0" "fstp\0" + "fstsw\0" "fsubp\0" "fsubrp\0" "ftst\0" "fucom\0" "fucomi\0" "fucomip\0" + "fucomp\0" "fucompp\0" "fwait\0" "fxam\0" "fxch\0" "fxrstor\0" "fxrstor64\0" + "fxsave\0" "fxsave64\0" "fxtract\0" "fyl2x\0" "fyl2xp1\0" "inc\0" "insertq\0" + "ja\0" "jae\0" "jb\0" "jbe\0" "jc\0" "je\0" "jecxz\0" "jg\0" "jge\0" "jl\0" + "jle\0" "jmp\0" "jna\0" "jnae\0" "jnb\0" "jnbe\0" "jnc\0" "jne\0" "jng\0" + "jnge\0" "jnl\0" "jnle\0" "jno\0" "jnp\0" "jns\0" "jnz\0" "jo\0" "jp\0" + "jpe\0" "jpo\0" "js\0" "jz\0" "lahf\0" "lea\0" "leave\0" "lfence\0" "lzcnt\0" + "mfence\0" "monitor\0" "mov_ptr\0" "movdq2q\0" "movnti\0" "movntq\0" + "movntsd\0" "movntss\0" "movq2dq\0" "movsx\0" "movsxd\0" "movzx\0" "mulx\0" + "mwait\0" "neg\0" "not\0" "pause\0" "pavgusb\0" "pdep\0" "pext\0" "pf2id\0" + "pf2iw\0" "pfacc\0" "pfadd\0" "pfcmpeq\0" "pfcmpge\0" "pfcmpgt\0" "pfmax\0" + "pfmin\0" "pfmul\0" "pfnacc\0" "pfpnacc\0" "pfrcp\0" "pfrcpit1\0" + "pfrcpit2\0" "pfrsqit1\0" "pfrsqrt\0" "pfsub\0" "pfsubr\0" "pi2fd\0" + "pi2fw\0" "pmulhrw\0" "pop\0" "popa\0" "popcnt\0" "popf\0" "prefetch\0" + "prefetch3dnow\0" "prefetchw\0" "prefetchwt1\0" "pshufw\0" "pswapd\0" + "push\0" "pusha\0" "pushf\0" "rcl\0" "rcr\0" "rdfsbase\0" "rdgsbase\0" + "rdrand\0" "rdseed\0" "rdtsc\0" "rdtscp\0" "rep lods_b\0" "rep lods_d\0" + "rep lods_q\0" "rep lods_w\0" "rep movs_b\0" "rep movs_d\0" "rep movs_q\0" + "rep movs_w\0" "rep stos_b\0" "rep stos_d\0" "rep stos_q\0" "rep stos_w\0" + "repe cmps_b\0" "repe cmps_d\0" "repe cmps_q\0" "repe cmps_w\0" + "repe scas_b\0" "repe scas_d\0" "repe scas_q\0" "repe scas_w\0" + "repne cmps_b\0" "repne cmps_d\0" "repne cmps_q\0" "repne cmps_w\0" + "repne scas_b\0" "repne scas_d\0" "repne scas_q\0" "repne scas_w\0" "ret\0" + "rol\0" "ror\0" "rorx\0" "sahf\0" "sal\0" "sar\0" "sarx\0" "sbb\0" "seta\0" + "setae\0" "setb\0" "setbe\0" "setc\0" "sete\0" "setg\0" "setge\0" "setl\0" + "setle\0" "setna\0" "setnae\0" "setnb\0" "setnbe\0" "setnc\0" "setne\0" + "setng\0" "setnge\0" "setnl\0" "setnle\0" "setno\0" "setnp\0" "setns\0" + "setnz\0" "seto\0" "setp\0" "setpe\0" "setpo\0" "sets\0" "setz\0" "sfence\0" + "sha1msg1\0" "sha1msg2\0" "sha1nexte\0" "sha1rnds4\0" "sha256msg1\0" + "sha256msg2\0" "sha256rnds2\0" "shl\0" "shlx\0" "shr\0" "shrd\0" "shrx\0" + "stc\0" "t1mskc\0" "tzcnt\0" "tzmsk\0" "ud2\0" "vaddpd\0" "vaddps\0" + "vaddsd\0" "vaddss\0" "vaddsubpd\0" "vaddsubps\0" "vaesdec\0" "vaesdeclast\0" + "vaesenc\0" "vaesenclast\0" "vaesimc\0" "vaeskeygenassist\0" "vandnpd\0" + "vandnps\0" "vandpd\0" "vandps\0" "vblendpd\0" "vblendps\0" "vblendvpd\0" + "vblendvps\0" "vbroadcastf128\0" "vbroadcasti128\0" "vbroadcastsd\0" + "vbroadcastss\0" "vcmppd\0" "vcmpps\0" "vcmpsd\0" "vcmpss\0" "vcomisd\0" + "vcomiss\0" "vcvtdq2pd\0" "vcvtdq2ps\0" "vcvtpd2dq\0" "vcvtpd2ps\0" + "vcvtph2ps\0" "vcvtps2dq\0" "vcvtps2pd\0" "vcvtps2ph\0" "vcvtsd2si\0" + "vcvtsd2ss\0" "vcvtsi2sd\0" "vcvtsi2ss\0" "vcvtss2sd\0" "vcvtss2si\0" + "vcvttpd2dq\0" "vcvttps2dq\0" "vcvttsd2si\0" "vcvttss2si\0" "vdivpd\0" + "vdivps\0" "vdivsd\0" "vdivss\0" "vdppd\0" "vdpps\0" "vextractf128\0" + "vextracti128\0" "vextractps\0" "vfmadd132pd\0" "vfmadd132ps\0" + "vfmadd132sd\0" "vfmadd132ss\0" "vfmadd213pd\0" "vfmadd213ps\0" + "vfmadd213sd\0" "vfmadd213ss\0" "vfmadd231pd\0" "vfmadd231ps\0" + "vfmadd231sd\0" "vfmadd231ss\0" "vfmaddpd\0" "vfmaddps\0" "vfmaddsd\0" + "vfmaddss\0" "vfmaddsub132pd\0" "vfmaddsub132ps\0" "vfmaddsub213pd\0" + "vfmaddsub213ps\0" "vfmaddsub231pd\0" "vfmaddsub231ps\0" "vfmaddsubpd\0" + "vfmaddsubps\0" "vfmsub132pd\0" "vfmsub132ps\0" "vfmsub132sd\0" + "vfmsub132ss\0" "vfmsub213pd\0" "vfmsub213ps\0" "vfmsub213sd\0" + "vfmsub213ss\0" "vfmsub231pd\0" "vfmsub231ps\0" "vfmsub231sd\0" + "vfmsub231ss\0" "vfmsubadd132pd\0" "vfmsubadd132ps\0" "vfmsubadd213pd\0" + "vfmsubadd213ps\0" "vfmsubadd231pd\0" "vfmsubadd231ps\0" "vfmsubaddpd\0" + "vfmsubaddps\0" "vfmsubpd\0" "vfmsubps\0" "vfmsubsd\0" "vfmsubss\0" + "vfnmadd132pd\0" "vfnmadd132ps\0" "vfnmadd132sd\0" "vfnmadd132ss\0" + "vfnmadd213pd\0" "vfnmadd213ps\0" "vfnmadd213sd\0" "vfnmadd213ss\0" + "vfnmadd231pd\0" "vfnmadd231ps\0" "vfnmadd231sd\0" "vfnmadd231ss\0" + "vfnmaddpd\0" "vfnmaddps\0" "vfnmaddsd\0" "vfnmaddss\0" "vfnmsub132pd\0" + "vfnmsub132ps\0" "vfnmsub132sd\0" "vfnmsub132ss\0" "vfnmsub213pd\0" + "vfnmsub213ps\0" "vfnmsub213sd\0" "vfnmsub213ss\0" "vfnmsub231pd\0" + "vfnmsub231ps\0" "vfnmsub231sd\0" "vfnmsub231ss\0" "vfnmsubpd\0" + "vfnmsubps\0" "vfnmsubsd\0" "vfnmsubss\0" "vfrczpd\0" "vfrczps\0" "vfrczsd\0" + "vfrczss\0" "vgatherdpd\0" "vgatherdps\0" "vgatherqpd\0" "vgatherqps\0" + "vhaddpd\0" "vhaddps\0" "vhsubpd\0" "vhsubps\0" "vinsertf128\0" + "vinserti128\0" "vinsertps\0" "vlddqu\0" "vldmxcsr\0" "vmaskmovdqu\0" + "vmaskmovpd\0" "vmaskmovps\0" "vmaxpd\0" "vmaxps\0" "vmaxsd\0" "vmaxss\0" + "vminpd\0" "vminps\0" "vminsd\0" "vminss\0" "vmovapd\0" "vmovaps\0" "vmovd\0" + "vmovddup\0" "vmovdqa\0" "vmovdqu\0" "vmovhlps\0" "vmovhpd\0" "vmovhps\0" + "vmovlhps\0" "vmovlpd\0" "vmovlps\0" "vmovmskpd\0" "vmovmskps\0" "vmovntdq\0" + "vmovntdqa\0" "vmovntpd\0" "vmovntps\0" "vmovq\0" "vmovsd\0" "vmovshdup\0" + "vmovsldup\0" "vmovss\0" "vmovupd\0" "vmovups\0" "vmpsadbw\0" "vmulpd\0" + "vmulps\0" "vmulsd\0" "vmulss\0" "vorpd\0" "vorps\0" "vpabsb\0" "vpabsd\0" + "vpabsw\0" "vpackssdw\0" "vpacksswb\0" "vpackusdw\0" "vpackuswb\0" "vpaddb\0" + "vpaddd\0" "vpaddq\0" "vpaddsb\0" "vpaddsw\0" "vpaddusb\0" "vpaddusw\0" + "vpaddw\0" "vpalignr\0" "vpand\0" "vpandn\0" "vpavgb\0" "vpavgw\0" + "vpblendd\0" "vpblendvb\0" "vpblendw\0" "vpbroadcastb\0" "vpbroadcastd\0" + "vpbroadcastq\0" "vpbroadcastw\0" "vpclmulqdq\0" "vpcmov\0" "vpcmpeqb\0" + "vpcmpeqd\0" "vpcmpeqq\0" "vpcmpeqw\0" "vpcmpestri\0" "vpcmpestrm\0" + "vpcmpgtb\0" "vpcmpgtd\0" "vpcmpgtq\0" "vpcmpgtw\0" "vpcmpistri\0" + "vpcmpistrm\0" "vpcomb\0" "vpcomd\0" "vpcomq\0" "vpcomub\0" "vpcomud\0" + "vpcomuq\0" "vpcomuw\0" "vpcomw\0" "vperm2f128\0" "vperm2i128\0" "vpermd\0" + "vpermil2pd\0" "vpermil2ps\0" "vpermilpd\0" "vpermilps\0" "vpermpd\0" + "vpermps\0" "vpermq\0" "vpextrb\0" "vpextrd\0" "vpextrq\0" "vpextrw\0" + "vpgatherdd\0" "vpgatherdq\0" "vpgatherqd\0" "vpgatherqq\0" "vphaddbd\0" + "vphaddbq\0" "vphaddbw\0" "vphaddd\0" "vphadddq\0" "vphaddsw\0" "vphaddubd\0" + "vphaddubq\0" "vphaddubw\0" "vphaddudq\0" "vphadduwd\0" "vphadduwq\0" + "vphaddw\0" "vphaddwd\0" "vphaddwq\0" "vphminposuw\0" "vphsubbw\0" + "vphsubd\0" "vphsubdq\0" "vphsubsw\0" "vphsubw\0" "vphsubwd\0" "vpinsrb\0" + "vpinsrd\0" "vpinsrq\0" "vpinsrw\0" "vpmacsdd\0" "vpmacsdqh\0" "vpmacsdql\0" + "vpmacssdd\0" "vpmacssdqh\0" "vpmacssdql\0" "vpmacsswd\0" "vpmacssww\0" + "vpmacswd\0" "vpmacsww\0" "vpmadcsswd\0" "vpmadcswd\0" "vpmaddubsw\0" + "vpmaddwd\0" "vpmaskmovd\0" "vpmaskmovq\0" "vpmaxsb\0" "vpmaxsd\0" + "vpmaxsw\0" "vpmaxub\0" "vpmaxud\0" "vpmaxuw\0" "vpminsb\0" "vpminsd\0" + "vpminsw\0" "vpminub\0" "vpminud\0" "vpminuw\0" "vpmovmskb\0" "vpmovsxbd\0" + "vpmovsxbq\0" "vpmovsxbw\0" "vpmovsxdq\0" "vpmovsxwd\0" "vpmovsxwq\0" + "vpmovzxbd\0" "vpmovzxbq\0" "vpmovzxbw\0" "vpmovzxdq\0" "vpmovzxwd\0" + "vpmovzxwq\0" "vpmuldq\0" "vpmulhrsw\0" "vpmulhuw\0" "vpmulhw\0" "vpmulld\0" + "vpmullw\0" "vpmuludq\0" "vpor\0" "vpperm\0" "vprotb\0" "vprotd\0" "vprotq\0" + "vprotw\0" "vpsadbw\0" "vpshab\0" "vpshad\0" "vpshaq\0" "vpshaw\0" "vpshlb\0" + "vpshld\0" "vpshlq\0" "vpshlw\0" "vpshufb\0" "vpshufd\0" "vpshufhw\0" + "vpshuflw\0" "vpsignb\0" "vpsignd\0" "vpsignw\0" "vpslld\0" "vpslldq\0" + "vpsllq\0" "vpsllvd\0" "vpsllvq\0" "vpsllw\0" "vpsrad\0" "vpsravd\0" + "vpsraw\0" "vpsrld\0" "vpsrldq\0" "vpsrlq\0" "vpsrlvd\0" "vpsrlvq\0" + "vpsrlw\0" "vpsubb\0" "vpsubd\0" "vpsubq\0" "vpsubsb\0" "vpsubsw\0" + "vpsubusb\0" "vpsubusw\0" "vpsubw\0" "vptest\0" "vpunpckhbw\0" "vpunpckhdq\0" + "vpunpckhqdq\0" "vpunpckhwd\0" "vpunpcklbw\0" "vpunpckldq\0" "vpunpcklqdq\0" + "vpunpcklwd\0" "vpxor\0" "vrcpps\0" "vrcpss\0" "vroundpd\0" "vroundps\0" + "vroundsd\0" "vroundss\0" "vrsqrtps\0" "vrsqrtss\0" "vshufpd\0" "vshufps\0" + "vsqrtpd\0" "vsqrtps\0" "vsqrtsd\0" "vsqrtss\0" "vstmxcsr\0" "vsubpd\0" + "vsubps\0" "vsubsd\0" "vsubss\0" "vtestpd\0" "vtestps\0" "vucomisd\0" + "vucomiss\0" "vunpckhpd\0" "vunpckhps\0" "vunpcklpd\0" "vunpcklps\0" + "vxorpd\0" "vxorps\0" "vzeroall\0" "vzeroupper\0" "wrfsbase\0" "wrgsbase\0" + "xadd\0" "xgetbv\0" "xsaveopt\0" "xsaveopt64\0" "xsetbv"; +// ---------------------------------------------------------------------------- + +static const uint16_t _x86InstNameIndex[] = { + 0, 1, 5, 640, 3418, 3430, 3652, 3662, 3157, 3169, 10, 2391, 2399, 2411, 2419, + 2431, 2439, 1644, 4511, 2456, 2464, 2472, 2479, 15, 21, 29, 34, 40, 47, 2486, + 2495, 2504, 2514, 52, 60, 65, 71, 78, 83, 87, 91, 97, 100, 104, 108, 112, + 117, 122, 126, 130, 135, 139, 143, 151, 162, 166, 172, 497, 504, 179, 512, + 185, 191, 198, 204, 211, 218, 519, 527, 226, 536, 233, 240, 248, 255, 263, + 270, 277, 284, 291, 297, 303, 310, 317, 323, 329, 2580, 2587, 1902, 1915, + 1928, 1941, 2594, 2601, 333, 341, 352, 6267, 6276, 362, 368, 372, 2624, 2634, + 2644, 378, 2654, 387, 396, 2674, 2684, 405, 2704, 2714, 2724, 2734, 2744, + 2754, 2764, 414, 2775, 424, 2786, 2797, 434, 438, 443, 447, 2394, 659, 2808, + 2815, 2822, 2829, 2836, 2842, 627, 451, 2874, 4916, 457, 463, 1382, 468, 474, + 479, 485, 490, 496, 503, 511, 518, 526, 535, 543, 551, 558, 563, 569, 576, + 582, 589, 594, 602, 607, 613, 619, 626, 632, 638, 644, 650, 657, 663, 670, + 675, 681, 689, 695, 700, 706, 713, 719, 726, 730, 735, 741, 748, 755, 762, + 769, 776, 782, 1424, 787, 793, 800, 807, 812, 819, 826, 834, 841, 848, 854, + 861, 867, 875, 882, 888, 895, 900, 908, 914, 918, 924, 931, 936, 1486, 942, + 1492, 948, 955, 960, 966, 973, 981, 988, 996, 1002, 1007, 1012, 1020, 1030, + 1037, 1046, 1054, 1060, 3941, 3949, 3957, 3965, 658, 676, 1068, 3997, 1072, + 871, 1080, 1083, 1087, 1090, 1094, 1097, 1106, 1109, 1113, 1116, 1124, 1128, + 1133, 1137, 1142, 1146, 1150, 1154, 1159, 1163, 1168, 1172, 1176, 1180, 1184, + 1187, 1190, 1194, 1198, 1201, 1100, 1120, 1204, 4007, 4014, 1209, 1213, 1219, + 1672, 1683, 1694, 1705, 1226, 4023, 5362, 4057, 4064, 5381, 4078, 1232, 4085, + 4092, 5429, 4106, 1239, 4624, 1247, 4113, 4121, 505, 5355, 4135, 1255, 4144, + 4027, 4160, 4169, 4177, 4185, 4194, 4202, 4210, 4220, 4230, 4239, 1263, 4249, + 4258, 1270, 1277, 1285, 5366, 1293, 1716, 1727, 1738, 1749, 4273, 4280, 4290, + 4300, 1301, 1307, 4307, 4315, 1314, 4323, 677, 4332, 4339, 4346, 4353, 1320, + 1325, 1331, 808, 1335, 1017, 6325, 6332, 4372, 4379, 4386, 4393, 4403, 4413, + 4423, 4433, 4440, 4447, 4454, 4462, 4470, 4479, 4488, 4495, 4504, 4510, 1339, + 4517, 1345, 4524, 4540, 4550, 4611, 4629, 4638, 4647, 4656, 4665, 4676, 4687, + 4696, 4705, 4714, 4723, 4734, 1353, 1358, 4899, 4907, 4915, 4923, 1363, 1369, + 1375, 1381, 1387, 1395, 1403, 1411, 1417, 1423, 1429, 1436, 1444, 1450, 1459, + 1468, 1477, 1485, 1491, 5002, 5019, 5088, 5114, 5135, 5152, 5161, 1498, 1504, + 5178, 5186, 5194, 5202, 5330, 5341, 5372, 5380, 5388, 5396, 5404, 5412, 5420, + 5428, 5436, 5444, 5452, 5460, 5468, 5478, 5488, 5498, 5508, 5518, 5528, 5538, + 5548, 5558, 5568, 5578, 5588, 5598, 5606, 1510, 5616, 5625, 5633, 5641, 5649, + 1518, 1522, 1527, 1534, 5658, 1539, 1548, 1562, 1572, 4324, 5762, 5770, 5778, + 5787, 1584, 5796, 5804, 5812, 5820, 5827, 5835, 5858, 5865, 5880, 5887, 5894, + 5902, 5925, 5932, 5939, 5946, 5953, 5961, 5969, 5978, 5987, 1591, 5994, 6001, + 6012, 6023, 6035, 6046, 6057, 6068, 6080, 1598, 1603, 1609, 6091, 1615, 6097, + 6104, 1619, 1623, 1632, 1641, 1648, 1655, 1661, 1668, 1679, 1690, 1701, 1712, + 1723, 1734, 1745, 1756, 1767, 1778, 1789, 1800, 1812, 1824, 1836, 1848, 1860, + 1872, 1884, 1896, 1909, 1922, 1935, 1948, 1961, 1974, 1987, 2000, 2004, 2008, + 2012, 6111, 6120, 6129, 6138, 6147, 6156, 2017, 2022, 2026, 2030, 2035, 1954, + 1967, 1980, 1993, 2039, 2044, 2050, 2055, 2061, 2066, 2071, 2076, 2082, 2087, + 2093, 2099, 2106, 2112, 2119, 2125, 2131, 2137, 2144, 2150, 2157, 2163, 2169, + 2175, 2181, 2186, 2191, 2197, 2203, 2208, 2213, 2220, 2229, 2238, 2248, 2258, + 2269, 2280, 2292, 5742, 2296, 2301, 2305, 2310, 6165, 6173, 6181, 6148, 6197, + 6157, 2315, 4580, 6213, 1760, 1771, 1782, 1793, 715, 3160, 3172, 3848, 3858, + 2319, 5995, 2326, 2332, 6266, 6275, 2338, 6284, 6294, 6304, 6314, 2342, 2349, + 2356, 2363, 2370, 2380, 2390, 2398, 2410, 2418, 2430, 2438, 2455, 2463, 2471, + 2478, 2485, 2494, 2503, 2513, 2523, 2538, 2553, 2566, 2579, 2586, 2593, 2600, + 2607, 2615, 2623, 2633, 2643, 2653, 2663, 2673, 2683, 2693, 2703, 2713, 2723, + 2733, 2743, 2753, 2763, 2774, 2785, 2796, 2807, 2814, 2821, 2828, 2835, 2841, + 2847, 2860, 2873, 2884, 2896, 2908, 2920, 2932, 2944, 2956, 2968, 2980, 2992, + 3004, 3016, 3028, 3037, 3046, 3055, 3064, 3079, 3094, 3109, 3124, 3139, 3154, + 3166, 3178, 3190, 3202, 3214, 3226, 3238, 3250, 3262, 3274, 3286, 3298, 3310, + 3322, 3337, 3352, 3367, 3382, 3397, 3412, 3424, 3436, 3445, 3454, 3463, 3472, + 3485, 3498, 3511, 3524, 3537, 3550, 3563, 3576, 3589, 3602, 3615, 3628, 3638, + 3648, 3658, 3668, 3681, 3694, 3707, 3720, 3733, 3746, 3759, 3772, 3785, 3798, + 3811, 3824, 3834, 3844, 3854, 3864, 3872, 3880, 3888, 3896, 3907, 3918, 3929, + 3940, 3948, 3956, 3964, 3972, 3984, 3996, 4006, 4013, 4022, 4034, 4045, 4056, + 4063, 4070, 4077, 4084, 4091, 4098, 4105, 4112, 4120, 4128, 4134, 4143, 4151, + 4159, 4168, 4176, 4184, 4193, 4201, 4209, 4219, 4229, 4238, 4248, 4257, 4266, + 4272, 4279, 4289, 4299, 4306, 4314, 4322, 4331, 4338, 4345, 4352, 4359, 4365, + 4371, 4378, 4385, 4392, 4402, 4412, 4422, 4432, 4439, 4446, 4453, 4461, 4469, + 4478, 4487, 4494, 4503, 4509, 4516, 4523, 4530, 4539, 4549, 4558, 4571, 4584, + 4597, 4610, 4621, 4628, 4637, 4646, 4655, 4664, 4675, 4686, 4695, 4704, 4713, + 4722, 4733, 4744, 4751, 4758, 4765, 4773, 4781, 4789, 4797, 4804, 4815, 4826, + 4833, 4844, 4855, 4865, 4875, 4883, 4891, 4898, 4906, 4914, 4922, 4930, 4941, + 4952, 4963, 4974, 4983, 4992, 5001, 5009, 5018, 5027, 5037, 5047, 5057, 5067, + 5077, 5087, 5095, 5104, 5113, 5125, 5134, 5142, 5151, 5160, 5168, 5177, 5185, + 5193, 5201, 5209, 5218, 5228, 5238, 5248, 5259, 5270, 5280, 5290, 5299, 5308, + 5319, 5329, 5340, 5349, 5360, 5371, 5379, 5387, 5395, 5403, 5411, 5419, 5427, + 5435, 5443, 5451, 5459, 5467, 5477, 5487, 5497, 5507, 5517, 5527, 5537, 5547, + 5557, 5567, 5577, 5587, 5597, 5605, 5615, 5624, 5632, 5640, 5648, 5657, 5662, + 5669, 5676, 5683, 5690, 5697, 5705, 5712, 5719, 5726, 5733, 5740, 5747, 5754, + 5761, 5769, 5777, 5786, 5795, 5803, 5811, 5819, 5826, 5834, 5841, 5849, 5857, + 5864, 5871, 5879, 5886, 5893, 5901, 5908, 5916, 5924, 5931, 5938, 5945, 5952, + 5960, 5968, 5977, 5986, 5993, 6000, 6011, 6022, 6034, 6045, 6056, 6067, 6079, + 6090, 6096, 6103, 6110, 6119, 6128, 6137, 6146, 6155, 6164, 6172, 6180, 6188, + 6196, 6204, 6212, 6221, 6228, 6235, 6242, 6249, 6257, 6265, 6274, 6283, 6293, + 6303, 6313, 6323, 6330, 6337, 6346, 6357, 6366, 6375, 336, 6380, 6092, 6324, + 6331, 1013, 1021, 1031, 1038, 6387, 6396, 6407 +}; + +enum X86InstAlphaIndex { + kX86InstAlphaIndexFirst = 'a', + kX86InstAlphaIndexLast = 'z', + kX86InstAlphaIndexInvalid = 0xFFFF +}; + +static const uint16_t _x86InstAlphaIndex[26] = { + kX86InstIdAdc, + kX86InstIdBextr, + kX86InstIdCall, + kX86InstIdDaa, + kX86InstIdEmms, + kX86InstIdF2xm1, + 0xFFFF, + kX86InstIdHaddpd, + kX86InstIdIdiv, + kX86InstIdJa, + 0xFFFF, + kX86InstIdLahf, + kX86InstIdMaskmovdqu, + kX86InstIdNeg, + kX86InstIdOr, + kX86InstIdPabsb, + 0xFFFF, + kX86InstIdRcl, + kX86InstIdSahf, + kX86InstIdT1mskc, + kX86InstIdUcomisd, + kX86InstIdVaddpd, + kX86InstIdWrfsbase, + kX86InstIdXadd, + 0xFFFF, + 0xFFFF +}; +#endif // !ASMJIT_DISABLE_NAMES +// ---------------------------------------------------------------------------- + +// ------------------- Automatically generated, do not edit ------------------- +const X86InstExtendedInfo _x86InstExtendedInfo[] = { + { Enc(None) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(None) , U }, + { Enc(X86Arith) , 0 , 0 , 0x20, 0x3F, 0, { O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U }, F(RW)|F(Lock) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x20, 0x20, 0, { O(Gqd) , O(GqdMem) , U , U , U }, F(RW) , U }, + { Enc(X86Arith) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U }, F(RW)|F(Lock) , U }, + { Enc(SimdRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x01, 0x01, 0, { O(Gqd) , O(GqdMem) , U , U , U }, F(RW) , U }, + { Enc(SimdRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(WO) , U }, + { Enc(AvxRvm_OptW) , 0 , 0 , 0x00, 0x3F, 0, { O(Gqd) , O(Gqd) , O(GqdMem) , U , U }, F(RW) , U }, + { Enc(AvxRmv_OptW) , 0 , 0 , 0x00, 0x3F, 0, { O(Gqd) , O(GqdMem) , O(Gqd) , U , U }, F(RW) , U }, + { Enc(XopVm_OptW) , 0 , 0 , 0x00, 0x3F, 0, { O(Gqd) , O(GqdMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(RW) , U }, + { Enc(SimdRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(AvxVm_OptW) , 0 , 0 , 0x00, 0x3F, 0, { O(Gqd) , O(GqdMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x00, 0x3F, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86BSwap) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , U , U , U , U }, F(RW) , U }, + { Enc(X86BTest) , 0 , 0 , 0x00, 0x3B, 0, { O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U }, F(RO) , O_000F00(BA,4,_,_,_) }, + { Enc(X86BTest) , 0 , 0 , 0x00, 0x3B, 0, { O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U }, F(RW)|F(Lock) , O_000F00(BA,7,_,_,_) }, + { Enc(X86BTest) , 0 , 0 , 0x00, 0x3B, 0, { O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U }, F(RW)|F(Lock) , O_000F00(BA,6,_,_,_) }, + { Enc(X86BTest) , 0 , 0 , 0x00, 0x3B, 0, { O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U }, F(RW)|F(Lock) , O_000F00(BA,5,_,_,_) }, + { Enc(X86Call) , 0 , 0 , 0x00, 0x00, 0, { O(GqdMem)|O(LImm) , U , U , U , U }, F(RW)|F(Flow)|F(Volatile) , O_000000(E8,U,_,_,_) }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x20, 0, { U , U , U , U , U }, F(Volatile) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x40, 0, { U , U , U , U , U }, F(Volatile) , U }, + { Enc(X86M) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(RO)|F(Volatile) , U }, + { Enc(X86Op) , 0 , 0 , 0x20, 0x20, 0, { U , U , U , U , U }, F(None) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x24, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x20, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x04, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x07, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x03, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x01, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x10, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86RegRm) , 0 , 0 , 0x02, 0x00, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(RW) , U }, + { Enc(X86Arith) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U }, F(RO) , U }, + { Enc(X86Op) , 0 , 0 , 0x40, 0x3F, 0, { U , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Op_66H) , 0 , 0 , 0x40, 0x3F, 0, { U , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86RmReg) , 0 , 0 , 0x00, 0x3F, 0, { U , U , U , U , U }, F(RW)|F(Lock)|F(Special) , U }, + { Enc(X86M) , 0 , 0 , 0x00, 0x04, 0, { O(Mem) , U , U , U , U }, F(RW)|F(Lock)|F(Special) , U }, + { Enc(SimdRm) , 0 , 0 , 0x00, 0x3F, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(RO) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Crc) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(GqdwbMem) , U , U , U }, F(RW) , U }, + { Enc(SimdRm) , 0 , 16, 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm) , 0 , 8 , 0x00, 0x00, 0, { O(Mm) , O(XmmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm) , 0 , 16, 0x00, 0x00, 0, { O(Xmm) , O(MmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm) , 0 , 8 , 0x00, 0x00, 0, { O(Xmm) , O(MmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm_Q) , 0 , 8 , 0x00, 0x00, 0, { O(Gqd) , O(XmmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm) , 0 , 4 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm_Q) , 0 , 8 , 0x00, 0x00, 0, { O(Xmm) , O(GqdMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm_Q) , 0 , 4 , 0x00, 0x00, 0, { O(Xmm) , O(GqdMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRm) , 0 , 8 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(WO) , U }, + { Enc(X86Op) , 0 , 0 , 0x28, 0x3F, 0, { U , U , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86IncDec) , 0 , 0 , 0x00, 0x1F, 0, { O(GqdwbMem) , U , U , U , U }, F(RW)|F(Lock) , O_000000(48,U,_,_,_) }, + { Enc(X86Rm_B) , 0 , 0 , 0x00, 0x3F, 0, { U , U , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Volatile) , U }, + { Enc(X86Enter) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Volatile)|F(Special) , U }, + { Enc(SimdExtract) , 0 , 8 , 0x00, 0x00, 0, { O(GqdMem) , O(Xmm) , U , U , U }, F(WO) , U }, + { Enc(SimdExtrq) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm)|O(Imm) , O(None)|O(Imm) , U , U }, F(RW) , O_660F00(78,0,_,_,_) }, + { Enc(FpuOp) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Fp) , U }, + { Enc(FpuArith) , 0 , 0 , 0x00, 0x00, 0, { O(FpMem) , O(Fp) , U , U , U }, F(Fp)|F(Mem4_8) , U }, + { Enc(FpuRDef) , 0 , 0 , 0x00, 0x00, 0, { O(Fp) , U , U , U , U }, F(Fp) , U }, + { Enc(X86M) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp) , U }, + { Enc(FpuR) , 0 , 0 , 0x20, 0x00, 0, { O(Fp) , U , U , U , U }, F(Fp) , U }, + { Enc(FpuR) , 0 , 0 , 0x24, 0x00, 0, { O(Fp) , U , U , U , U }, F(Fp) , U }, + { Enc(FpuR) , 0 , 0 , 0x04, 0x00, 0, { O(Fp) , U , U , U , U }, F(Fp) , U }, + { Enc(FpuR) , 0 , 0 , 0x10, 0x00, 0, { O(Fp) , U , U , U , U }, F(Fp) , U }, + { Enc(FpuCom) , 0 , 0 , 0x00, 0x00, 0, { O(Fp)|O(Mem) , O(Fp) , U , U , U }, F(Fp) , U }, + { Enc(FpuR) , 0 , 0 , 0x00, 0x3F, 0, { O(Fp) , U , U , U , U }, F(Fp) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Fp) , U }, + { Enc(FpuR) , 0 , 0 , 0x00, 0x00, 0, { O(Fp) , U , U , U , U }, F(Fp) , U }, + { Enc(FpuM) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp)|F(Mem2_4) , U }, + { Enc(FpuM) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp)|F(Mem2_4_8) , O_000000(DF,5,_,_,_) }, + { Enc(FpuM) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp)|F(Mem2_4_8) , O_000000(DF,7,_,_,_) }, + { Enc(FpuM) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp)|F(Mem2_4_8) , O_000000(DD,1,_,_,_) }, + { Enc(FpuFldFst) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp)|F(Mem4_8_10) , O_000000(DB,5,_,_,_) }, + { Enc(FpuStsw) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp) , O_00_X(DFE0,U) }, + { Enc(FpuFldFst) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp)|F(Mem4_8) , U }, + { Enc(FpuFldFst) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp)|F(Mem4_8_10) , O_000000(DB,7,_,_,_) }, + { Enc(FpuStsw) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Fp) , O_9B_X(DFE0,U) }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Fp)|F(Volatile) , U }, + { Enc(X86Rm_B) , 0 , 0 , 0x00, 0x3F, 0, { 0 , 0 , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86Imul) , 0 , 0 , 0x00, 0x3F, 0, { 0 , 0 , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86IncDec) , 0 , 0 , 0x00, 0x1F, 0, { O(GqdwbMem) , U , U , U , U }, F(RW)|F(Lock) , O_000000(40,U,_,_,_) }, + { Enc(SimdInsertq) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(None)|O(Imm) , O(None)|O(Imm) , U }, F(RW) , O_F20F00(78,U,_,_,_) }, + { Enc(X86Int) , 0 , 0 , 0x00, 0x80, 0, { U , U , U , U , U }, F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x24, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x20, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x04, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x07, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x03, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x01, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x10, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jcc) , 0 , 0 , 0x02, 0x00, 0, { O(Label) , U , U , U , U }, F(Flow)|F(Volatile) , U }, + { Enc(X86Jecxz) , 0 , 0 , 0x00, 0x00, 0, { O(Gqdw) , O(Label) , U , U , U }, F(Flow)|F(Volatile)|F(Special) , U }, + { Enc(X86Jmp) , 0 , 0 , 0x00, 0x00, 0, { O(Label)|O(Imm) , U , U , U , U }, F(Flow)|F(Volatile) , O_000000(E9,U,_,_,_) }, + { Enc(X86Op) , 0 , 0 , 0x3E, 0x00, 0, { U , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(SimdRm) , 0 , 16, 0x00, 0x00, 0, { O(Xmm) , O(Mem) , U , U , U }, F(WO) , U }, + { Enc(X86Lea) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(Mem) , U , U , U }, F(WO) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Volatile)|F(Special) , U }, + { Enc(X86Fence) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Volatile) , U }, + { Enc(X86Op) , 0 , 1 , 0x40, 0x00, 0, { U , U , U , U , U }, F(WO)|F(Special) , U }, + { Enc(X86Op) , 0 , 4 , 0x40, 0x00, 0, { U , U , U , U , U }, F(WO)|F(Special) , U }, + { Enc(X86Op) , 0 , 8 , 0x40, 0x00, 0, { U , U , U , U , U }, F(WO)|F(Special) , U }, + { Enc(X86Op_66H) , 0 , 2 , 0x40, 0x00, 0, { U , U , U , U , U }, F(WO)|F(Special) , U }, + { Enc(SimdRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(SimdRm) , 0 , 0 , 0x00, 0x00, 0, { O(Mm) , O(Mm) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86Fence) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(RW) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(RO)|F(Volatile)|F(Special) , U }, + { Enc(X86Mov) , 0 , 0 , 0x00, 0x00, 0, { O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U }, F(WO) , U }, + { Enc(X86MovPtr) , 0 , 0 , 0x00, 0x00, 0, { O(Gqdwb) , O(Imm) , U , U , U }, F(WO)|F(Special) , O_000000(A2,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_660F00(29,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_000F00(29,U,_,_,_) }, + { Enc(SimdMovBe) , 0 , 0 , 0x00, 0x00, 0, { O(GqdwMem) , O(GqdwMem) , U , U , U }, F(WO) , O_000F38(F1,U,_,_,_) }, + { Enc(SimdMovD) , 0 , 16, 0x00, 0x00, 0, { O(Gd)|O(MmXmmMem) , O(Gd)|O(MmXmmMem) , U , U , U }, F(WO) , O_000F00(7E,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(Mm) , O(Xmm) , U , U , U }, F(WO) , U }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_660F00(7F,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_F30F00(7F,U,_,_,_) }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , U , U , U }, F(WO) , U }, + { Enc(SimdMov) , 8 , 8 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(RW) , O_660F00(17,U,_,_,_) }, + { Enc(SimdMov) , 8 , 8 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(RW) , O_000F00(17,U,_,_,_) }, + { Enc(SimdMov) , 8 , 8 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , U , U , U }, F(RW) , U }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_660F00(13,U,_,_,_) }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_000F00(13,U,_,_,_) }, + { Enc(SimdMovNoRexW) , 0 , 8 , 0x00, 0x00, 0, { O(Gqd) , O(Xmm) , U , U , U }, F(WO) , U }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(Mem) , O(Xmm) , U , U , U }, F(WO) , O_660F00(E7,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(Xmm) , O(Mem) , U , U , U }, F(WO) , U }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(Mem) , O(Gqd) , U , U , U }, F(WO) , O_000F00(C3,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(Mem) , O(Xmm) , U , U , U }, F(WO) , O_660F00(2B,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(Mem) , O(Xmm) , U , U , U }, F(WO) , O_000F00(2B,U,_,_,_) }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(Mem) , O(Mm) , U , U , U }, F(WO) , O_000F00(E7,U,_,_,_) }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(Mem) , O(Xmm) , U , U , U }, F(WO) , O_F20F00(2B,U,_,_,_) }, + { Enc(SimdMov) , 0 , 4 , 0x00, 0x00, 0, { O(Mem) , O(Xmm) , U , U , U }, F(WO) , O_F30F00(2B,U,_,_,_) }, + { Enc(SimdMovQ) , 0 , 16, 0x00, 0x00, 0, { O(Gq)|O(MmXmmMem) , O(Gq)|O(MmXmmMem) , U , U , U }, F(WO) , O_000F00(7E,U,_,W,_) }, + { Enc(SimdRm) , 0 , 16, 0x00, 0x00, 0, { O(Xmm) , O(Mm) , U , U , U }, F(WO) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(WO)|F(Special) , U }, + { Enc(X86Op_66H) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(WO)|F(Special) , U }, + { Enc(SimdMov) , 0 , 8 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO)|F(ZeroIfMem) , O_F20F00(11,U,_,_,_) }, + { Enc(SimdMov) , 0 , 4 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO)|F(ZeroIfMem) , O_F30F00(11,U,_,_,_) }, + { Enc(X86MovsxMovzx) , 0 , 0 , 0x00, 0x00, 0, { O(Gqdw) , O(GwbMem) , U , U , U }, F(WO) , U }, + { Enc(X86Movsxd) , 0 , 0 , 0x00, 0x00, 0, { O(Gq) , O(GdMem) , U , U , U }, F(WO) , U }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_660F00(11,U,_,_,_) }, + { Enc(SimdMov) , 0 , 16, 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO) , O_000F00(11,U,_,_,_) }, + { Enc(AvxRvm_OptW) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(Gqd) , O(GqdMem) , U , U }, F(RW) , U }, + { Enc(X86Rm_B) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , U , U , U , U }, F(RW)|F(Lock) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(None) , U }, + { Enc(X86Rm_B) , 0 , 0 , 0x00, 0x00, 0, { O(GqdwbMem) , U , U , U , U }, F(RW)|F(Lock) , U }, + { Enc(SimdRm_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem) , U , U , U }, F(RW) , U }, + { Enc(SimdRmi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem) , O(Imm) , U , U }, F(RW) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(RW) , U }, + { Enc(Simd3dNow) , 0 , 0 , 0x00, 0x00, 0, { O(Mm) , O(MmMem) , U , U , U }, F(RW) , U }, + { Enc(SimdRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(WO)|F(Special) , U }, + { Enc(AvxRvm_OptW) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(Gqd) , O(GqdMem) , U , U }, F(WO) , U }, + { Enc(SimdExtract) , 0 , 8 , 0x00, 0x00, 0, { O(Gd)|O(Gb)|O(Mem), O(Xmm) , U , U , U }, F(WO) , U }, + { Enc(SimdExtract) , 0 , 8 , 0x00, 0x00, 0, { O(GdMem) , O(Xmm) , U , U , U }, F(WO) , U }, + { Enc(SimdPextrw) , 0 , 8 , 0x00, 0x00, 0, { O(GdMem) , O(MmXmm) , U , U , U }, F(WO) , O_000F3A(15,U,_,_,_) }, + { Enc(Simd3dNow) , 0 , 8 , 0x00, 0x00, 0, { O(Mm) , O(MmMem) , U , U , U }, F(WO) , U }, + { Enc(SimdRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(GdMem) , O(Imm) , U , U }, F(RW) , U }, + { Enc(SimdRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(GqMem) , O(Imm) , U , U }, F(RW) , U }, + { Enc(SimdRmi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(GdMem) , O(Imm) , U , U }, F(RW) , U }, + { Enc(SimdRm_PQ) , 0 , 8 , 0x00, 0x00, 0, { O(Gqd) , O(MmXmm) , U , U , U }, F(WO) , U }, + { Enc(X86Pop) , 0 , 0 , 0x00, 0x00, 0, { 0 , U , U , U , U }, F(WO)|F(Volatile)|F(Special) , O_000000(58,U,_,_,_) }, + { Enc(X86RegRm) , 0 , 0 , 0x00, 0x3F, 0, { O(Gqdw) , O(GqdwMem) , U , U , U }, F(WO) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0xFF, 0, { U , U , U , U , U }, F(Volatile)|F(Special) , U }, + { Enc(X86Prefetch) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , O(Imm) , U , U , U }, F(RO)|F(Volatile) , U }, + { Enc(X86M) , 0 , 0 , 0x00, 0x3F, 0, { O(Mem) , O(Imm) , U , U , U }, F(RO)|F(Volatile) , U }, + { Enc(SimdRmi) , 0 , 16, 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(WO) , U }, + { Enc(SimdRmi_P) , 0 , 8 , 0x00, 0x00, 0, { O(Mm) , O(MmMem) , O(Imm) , U , U }, F(WO) , U }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(72,6,_,_,_) }, + { Enc(SimdRmRi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Imm) , U , U , U }, F(RW) , O_660F00(73,7,_,_,_) }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(73,6,_,_,_) }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(71,6,_,_,_) }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(72,4,_,_,_) }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(71,4,_,_,_) }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(72,2,_,_,_) }, + { Enc(SimdRmRi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Imm) , U , U , U }, F(RW) , O_660F00(73,3,_,_,_) }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(73,2,_,_,_) }, + { Enc(SimdRmRi_P) , 0 , 0 , 0x00, 0x00, 0, { O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U }, F(RW) , O_000F00(71,2,_,_,_) }, + { Enc(X86Push) , 0 , 0 , 0x00, 0x00, 0, { 0 , U , U , U , U }, F(RO)|F(Volatile)|F(Special) , O_000000(50,U,_,_,_) }, + { Enc(X86Op) , 0 , 0 , 0xFF, 0x00, 0, { U , U , U , U , U }, F(Volatile)|F(Special) , U }, + { Enc(X86Rot) , 0 , 0 , 0x20, 0x21, 0, { O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86Rm) , 0 , 8 , 0x00, 0x00, 0, { O(Gqd) , U , U , U , U }, F(WO) , U }, + { Enc(X86Rm) , 0 , 8 , 0x00, 0x3F, 0, { O(Gqdw) , U , U , U , U }, F(WO) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(WO)|F(Volatile)|F(Special) , U }, + { Enc(X86Rep) , 0 , 0 , 0x40, 0x00, 0, { O(Mem) , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Rep) , 0 , 0 , 0x40, 0x00, 0, { O(Mem) , O(Mem) , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Rep) , 0 , 0 , 0x40, 0x3F, 0, { O(Mem) , O(Mem) , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Ret) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Rot) , 0 , 0 , 0x00, 0x21, 0, { O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(AvxRmi_OptW) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(GqdMem) , O(Imm) , U , U }, F(WO) , U }, + { Enc(SimdRmi) , 0 , 8 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(WO) , U }, + { Enc(SimdRmi) , 0 , 4 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(WO) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x3E, 0, { U , U , U , U , U }, F(RO)|F(Volatile)|F(Special) , U }, + { Enc(X86Rot) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(AvxRmv_OptW) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(GqdMem) , O(Gqd) , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x24, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x20, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x04, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x07, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x03, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x01, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x10, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86Set) , 0 , 1 , 0x02, 0x00, 0, { O(GbMem) , U , U , U , U }, F(WO) , U }, + { Enc(X86ShldShrd) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , O(Gb) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86ShldShrd) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , O(Gqdwb) , U , U , U }, F(RW)|F(Special) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x20, 0, { U , U , U , U , U }, F(None) , U }, + { Enc(X86Op) , 0 , 0 , 0x00, 0x40, 0, { U , U , U , U , U }, F(None) , U }, + { Enc(X86M) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Volatile) , U }, + { Enc(X86Op) , 0 , 0 , 0x40, 0x00, 0, { U , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Op_66H) , 0 , 0 , 0x40, 0x00, 0, { U , U , U , U , U }, F(RW)|F(Volatile)|F(Special) , U }, + { Enc(X86Test) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , O(Gqdwb)|O(Imm) , U , U , U }, F(RO) , O_000000(F6,U,_,_,_) }, + { Enc(AvxRvm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(XmmMem) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , O(Xy) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm) , 0 , 0 , 0x00, 0x00, 0, { O(Ymm) , O(Mem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm) , 0 , 0 , 0x00, 0x00, 0, { O(Ymm) , O(XmmMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XmmMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm) , 0 , 0 , 0x00, 0x3F, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(RO)|F(Avx) , U }, + { Enc(AvxRm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMri_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(Xy) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(XmmMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(GqdMem) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMri) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(Ymm) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMri) , 0 , 0 , 0x00, 0x00, 0, { O(GqdMem) , O(Xmm) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , U , U }, F(RW)|F(Avx) , U }, + { Enc(AvxRvm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(XmmMem) , U , U }, F(RW)|F(Avx) , U }, + { Enc(Fma4_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U }, F(WO)|F(Avx) , U }, + { Enc(Fma4) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U }, F(WO)|F(Avx) , U }, + { Enc(XopRm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(XopRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxGather) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Mem) , O(Xy) , U , U }, F(RW)|F(Avx) , U }, + { Enc(AvxGatherEx) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Mem) , O(Xmm) , U , U }, F(RW)|F(Avx) , U }, + { Enc(AvxRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Ymm) , O(Ymm) , O(XmmMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Mem) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxM) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(RO)|F(Avx)|F(Volatile) , U }, + { Enc(AvxRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , U , U , U }, F(RO)|F(Avx)|F(Special) , U }, + { Enc(AvxRvmMvr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(Xy) , O(XyMem) , U , U }, F(RW)|F(Avx) , O_660F38(2F,U,_,_,_) }, + { Enc(AvxRvmMvr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(Xy) , O(XyMem) , U , U }, F(RW)|F(Avx) , O_660F38(2E,U,_,_,_) }, + { Enc(AvxRmMr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , O_660F00(29,U,_,_,_) }, + { Enc(AvxRmMr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , O_000F00(29,U,_,_,_) }, + { Enc(AvxMovDQ) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(WO)|F(Avx) , O_660F00(7E,U,_,_,_) }, + { Enc(AvxRmMr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , O_660F00(7F,U,_,_,_) }, + { Enc(AvxRmMr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , O_F30F00(7F,U,_,_,_) }, + { Enc(AvxRvm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(Xmm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmMr) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(Xmm) , O(Mem) , U , U }, F(WO)|F(Avx) , O_660F00(17,U,_,_,_) }, + { Enc(AvxRvmMr) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(Xmm) , O(Mem) , U , U }, F(WO)|F(Avx) , O_000F00(17,U,_,_,_) }, + { Enc(AvxRvmMr) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(Xmm) , O(Mem) , U , U }, F(WO)|F(Avx) , O_660F00(13,U,_,_,_) }, + { Enc(AvxRvmMr) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(Xmm) , O(Mem) , U , U }, F(WO)|F(Avx) , O_000F00(13,U,_,_,_) }, + { Enc(AvxRm_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , O(Xy) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , O(Xy) , U , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMovSsSd) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , O(Xmm) , U , U }, F(WO)|F(Avx) , O_F20F00(11,U,_,_,_) }, + { Enc(AvxMovSsSd) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(Xmm) , O(Xmm) , U , U }, F(WO)|F(Avx) , O_F30F00(11,U,_,_,_) }, + { Enc(AvxRmMr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , O_660F00(11,U,_,_,_) }, + { Enc(AvxRmMr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(WO)|F(Avx) , O_000F00(11,U,_,_,_) }, + { Enc(AvxRvmr) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , O(Xy) , U }, F(WO)|F(Avx) , U }, + { Enc(XopRvrmRvmr_OptL), 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(Imm) , U , U }, F(WO)|F(Avx)|F(Special) , U }, + { Enc(XopRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Ymm) , O(Ymm) , O(YmmMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvm) , 0 , 0 , 0x00, 0x00, 0, { O(Ymm) , O(Ymm) , O(YmmMem) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvrmRvmr_OptL), 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmRmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F3A(05,U,_,_,_) }, + { Enc(AvxRvmRmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F3A(04,U,_,_,_) }, + { Enc(AvxRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Ymm) , O(YmmMem) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMri) , 0 , 0 , 0x00, 0x00, 0, { O(GqdwbMem) , O(Xmm) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMri) , 0 , 0 , 0x00, 0x00, 0, { O(GqMem) , O(Xmm) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxMri) , 0 , 0 , 0x00, 0x00, 0, { O(GqdwMem) , O(Xmm) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(GqdwbMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(GqdMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(GqMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(GqdwMem) , O(Imm) , U }, F(WO)|F(Avx) , U }, + { Enc(XopRvmr) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmMvr_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(Xy) , O(XyMem) , U , U }, F(WO)|F(Avx) , O_660F38(8E,U,_,_,_) }, + { Enc(XopRvrmRvmr) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U }, F(WO)|F(Avx) , U }, + { Enc(XopRvmRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_00_M08(C0,U,_,_,_) }, + { Enc(XopRvmRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_00_M08(C2,U,_,_,_) }, + { Enc(XopRvmRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_00_M08(C3,U,_,_,_) }, + { Enc(XopRvmRmi) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_00_M08(C1,U,_,_,_) }, + { Enc(XopRvmRmv) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , O(XmmMem) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(72,6,_,_,_) }, + { Enc(AvxVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(Imm) , U , U }, F(WO)|F(Avx) , U }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(73,6,_,_,_) }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(71,6,_,_,_) }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(72,4,_,_,_) }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(71,4,_,_,_) }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(72,2,_,_,_) }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(73,2,_,_,_) }, + { Enc(AvxRvmVmi_OptL) , 0 , 0 , 0x00, 0x00, 0, { O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U }, F(WO)|F(Avx) , O_660F00(71,2,_,_,_) }, + { Enc(AvxRm_OptL) , 0 , 0 , 0x00, 0x3F, 0, { O(Xy) , O(XyMem) , U , U , U }, F(RO)|F(Avx) , U }, + { Enc(AvxM) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(Volatile)|F(Avx) , U }, + { Enc(AvxOp) , 0 , 0 , 0x00, 0x00, 0, { U , U , U , U , U }, F(Volatile)|F(Avx) , U }, + { Enc(X86Rm) , 0 , 0 , 0x00, 0x00, 0, { O(Gqd) , U , U , U , U }, F(RO)|F(Volatile) , U }, + { Enc(X86Xadd) , 0 , 0 , 0x00, 0x3F, 0, { O(GqdwbMem) , O(Gqdwb) , U , U , U }, F(RW)|F(Xchg)|F(Lock) , U }, + { Enc(X86Xchg) , 0 , 0 , 0x00, 0x00, 0, { O(GqdwbMem) , O(Gqdwb) , U , U , U }, F(RW)|F(Xchg)|F(Lock) , U }, + { Enc(SimdRm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(XmmMem) , U , U , U }, F(RW)|F(None) , U }, + { Enc(X86M) , 0 , 0 , 0x00, 0x00, 0, { O(Mem) , U , U , U , U }, F(RO)|F(Volatile)|F(Special) , U } +}; +// ---------------------------------------------------------------------------- + +// ------------------- Automatically generated, do not edit ------------------- +enum X86InstData_ExtendedIndex { + kX86InstIdNone_ExtendedIndex = 0, + kX86InstIdAdc_ExtendedIndex = 1, + kX86InstIdAdcx_ExtendedIndex = 2, + kX86InstIdAdd_ExtendedIndex = 3, + kX86InstIdAddpd_ExtendedIndex = 4, + kX86InstIdAddps_ExtendedIndex = 4, + kX86InstIdAddsd_ExtendedIndex = 4, + kX86InstIdAddss_ExtendedIndex = 4, + kX86InstIdAddsubpd_ExtendedIndex = 4, + kX86InstIdAddsubps_ExtendedIndex = 4, + kX86InstIdAdox_ExtendedIndex = 5, + kX86InstIdAesdec_ExtendedIndex = 4, + kX86InstIdAesdeclast_ExtendedIndex = 4, + kX86InstIdAesenc_ExtendedIndex = 4, + kX86InstIdAesenclast_ExtendedIndex = 4, + kX86InstIdAesimc_ExtendedIndex = 6, + kX86InstIdAeskeygenassist_ExtendedIndex = 7, + kX86InstIdAnd_ExtendedIndex = 3, + kX86InstIdAndn_ExtendedIndex = 8, + kX86InstIdAndnpd_ExtendedIndex = 4, + kX86InstIdAndnps_ExtendedIndex = 4, + kX86InstIdAndpd_ExtendedIndex = 4, + kX86InstIdAndps_ExtendedIndex = 4, + kX86InstIdBextr_ExtendedIndex = 9, + kX86InstIdBlcfill_ExtendedIndex = 10, + kX86InstIdBlci_ExtendedIndex = 10, + kX86InstIdBlcic_ExtendedIndex = 10, + kX86InstIdBlcmsk_ExtendedIndex = 10, + kX86InstIdBlcs_ExtendedIndex = 10, + kX86InstIdBlendpd_ExtendedIndex = 11, + kX86InstIdBlendps_ExtendedIndex = 11, + kX86InstIdBlendvpd_ExtendedIndex = 12, + kX86InstIdBlendvps_ExtendedIndex = 12, + kX86InstIdBlsfill_ExtendedIndex = 10, + kX86InstIdBlsi_ExtendedIndex = 13, + kX86InstIdBlsic_ExtendedIndex = 10, + kX86InstIdBlsmsk_ExtendedIndex = 13, + kX86InstIdBlsr_ExtendedIndex = 13, + kX86InstIdBsf_ExtendedIndex = 14, + kX86InstIdBsr_ExtendedIndex = 14, + kX86InstIdBswap_ExtendedIndex = 15, + kX86InstIdBt_ExtendedIndex = 16, + kX86InstIdBtc_ExtendedIndex = 17, + kX86InstIdBtr_ExtendedIndex = 18, + kX86InstIdBts_ExtendedIndex = 19, + kX86InstIdBzhi_ExtendedIndex = 9, + kX86InstIdCall_ExtendedIndex = 20, + kX86InstIdCbw_ExtendedIndex = 21, + kX86InstIdCdq_ExtendedIndex = 21, + kX86InstIdCdqe_ExtendedIndex = 21, + kX86InstIdClc_ExtendedIndex = 22, + kX86InstIdCld_ExtendedIndex = 23, + kX86InstIdClflush_ExtendedIndex = 24, + kX86InstIdClflushopt_ExtendedIndex = 24, + kX86InstIdCmc_ExtendedIndex = 25, + kX86InstIdCmova_ExtendedIndex = 26, + kX86InstIdCmovae_ExtendedIndex = 27, + kX86InstIdCmovb_ExtendedIndex = 27, + kX86InstIdCmovbe_ExtendedIndex = 26, + kX86InstIdCmovc_ExtendedIndex = 27, + kX86InstIdCmove_ExtendedIndex = 28, + kX86InstIdCmovg_ExtendedIndex = 29, + kX86InstIdCmovge_ExtendedIndex = 30, + kX86InstIdCmovl_ExtendedIndex = 30, + kX86InstIdCmovle_ExtendedIndex = 29, + kX86InstIdCmovna_ExtendedIndex = 26, + kX86InstIdCmovnae_ExtendedIndex = 27, + kX86InstIdCmovnb_ExtendedIndex = 27, + kX86InstIdCmovnbe_ExtendedIndex = 26, + kX86InstIdCmovnc_ExtendedIndex = 27, + kX86InstIdCmovne_ExtendedIndex = 28, + kX86InstIdCmovng_ExtendedIndex = 29, + kX86InstIdCmovnge_ExtendedIndex = 30, + kX86InstIdCmovnl_ExtendedIndex = 30, + kX86InstIdCmovnle_ExtendedIndex = 29, + kX86InstIdCmovno_ExtendedIndex = 31, + kX86InstIdCmovnp_ExtendedIndex = 32, + kX86InstIdCmovns_ExtendedIndex = 33, + kX86InstIdCmovnz_ExtendedIndex = 28, + kX86InstIdCmovo_ExtendedIndex = 31, + kX86InstIdCmovp_ExtendedIndex = 32, + kX86InstIdCmovpe_ExtendedIndex = 32, + kX86InstIdCmovpo_ExtendedIndex = 32, + kX86InstIdCmovs_ExtendedIndex = 33, + kX86InstIdCmovz_ExtendedIndex = 28, + kX86InstIdCmp_ExtendedIndex = 34, + kX86InstIdCmppd_ExtendedIndex = 11, + kX86InstIdCmpps_ExtendedIndex = 11, + kX86InstIdCmpsB_ExtendedIndex = 35, + kX86InstIdCmpsD_ExtendedIndex = 35, + kX86InstIdCmpsQ_ExtendedIndex = 35, + kX86InstIdCmpsW_ExtendedIndex = 36, + kX86InstIdCmpsd_ExtendedIndex = 11, + kX86InstIdCmpss_ExtendedIndex = 11, + kX86InstIdCmpxchg_ExtendedIndex = 37, + kX86InstIdCmpxchg16b_ExtendedIndex = 38, + kX86InstIdCmpxchg8b_ExtendedIndex = 38, + kX86InstIdComisd_ExtendedIndex = 39, + kX86InstIdComiss_ExtendedIndex = 39, + kX86InstIdCpuid_ExtendedIndex = 40, + kX86InstIdCqo_ExtendedIndex = 21, + kX86InstIdCrc32_ExtendedIndex = 41, + kX86InstIdCvtdq2pd_ExtendedIndex = 42, + kX86InstIdCvtdq2ps_ExtendedIndex = 42, + kX86InstIdCvtpd2dq_ExtendedIndex = 42, + kX86InstIdCvtpd2pi_ExtendedIndex = 43, + kX86InstIdCvtpd2ps_ExtendedIndex = 42, + kX86InstIdCvtpi2pd_ExtendedIndex = 44, + kX86InstIdCvtpi2ps_ExtendedIndex = 45, + kX86InstIdCvtps2dq_ExtendedIndex = 42, + kX86InstIdCvtps2pd_ExtendedIndex = 42, + kX86InstIdCvtps2pi_ExtendedIndex = 43, + kX86InstIdCvtsd2si_ExtendedIndex = 46, + kX86InstIdCvtsd2ss_ExtendedIndex = 47, + kX86InstIdCvtsi2sd_ExtendedIndex = 48, + kX86InstIdCvtsi2ss_ExtendedIndex = 49, + kX86InstIdCvtss2sd_ExtendedIndex = 50, + kX86InstIdCvtss2si_ExtendedIndex = 46, + kX86InstIdCvttpd2dq_ExtendedIndex = 42, + kX86InstIdCvttpd2pi_ExtendedIndex = 43, + kX86InstIdCvttps2dq_ExtendedIndex = 42, + kX86InstIdCvttps2pi_ExtendedIndex = 43, + kX86InstIdCvttsd2si_ExtendedIndex = 46, + kX86InstIdCvttss2si_ExtendedIndex = 46, + kX86InstIdCwd_ExtendedIndex = 21, + kX86InstIdCwde_ExtendedIndex = 21, + kX86InstIdDaa_ExtendedIndex = 51, + kX86InstIdDas_ExtendedIndex = 51, + kX86InstIdDec_ExtendedIndex = 52, + kX86InstIdDiv_ExtendedIndex = 53, + kX86InstIdDivpd_ExtendedIndex = 4, + kX86InstIdDivps_ExtendedIndex = 4, + kX86InstIdDivsd_ExtendedIndex = 4, + kX86InstIdDivss_ExtendedIndex = 4, + kX86InstIdDppd_ExtendedIndex = 11, + kX86InstIdDpps_ExtendedIndex = 11, + kX86InstIdEmms_ExtendedIndex = 54, + kX86InstIdEnter_ExtendedIndex = 55, + kX86InstIdExtractps_ExtendedIndex = 56, + kX86InstIdExtrq_ExtendedIndex = 57, + kX86InstIdF2xm1_ExtendedIndex = 58, + kX86InstIdFabs_ExtendedIndex = 58, + kX86InstIdFadd_ExtendedIndex = 59, + kX86InstIdFaddp_ExtendedIndex = 60, + kX86InstIdFbld_ExtendedIndex = 61, + kX86InstIdFbstp_ExtendedIndex = 61, + kX86InstIdFchs_ExtendedIndex = 58, + kX86InstIdFclex_ExtendedIndex = 58, + kX86InstIdFcmovb_ExtendedIndex = 62, + kX86InstIdFcmovbe_ExtendedIndex = 63, + kX86InstIdFcmove_ExtendedIndex = 64, + kX86InstIdFcmovnb_ExtendedIndex = 62, + kX86InstIdFcmovnbe_ExtendedIndex = 63, + kX86InstIdFcmovne_ExtendedIndex = 64, + kX86InstIdFcmovnu_ExtendedIndex = 65, + kX86InstIdFcmovu_ExtendedIndex = 65, + kX86InstIdFcom_ExtendedIndex = 66, + kX86InstIdFcomi_ExtendedIndex = 67, + kX86InstIdFcomip_ExtendedIndex = 67, + kX86InstIdFcomp_ExtendedIndex = 66, + kX86InstIdFcompp_ExtendedIndex = 58, + kX86InstIdFcos_ExtendedIndex = 58, + kX86InstIdFdecstp_ExtendedIndex = 58, + kX86InstIdFdiv_ExtendedIndex = 59, + kX86InstIdFdivp_ExtendedIndex = 60, + kX86InstIdFdivr_ExtendedIndex = 59, + kX86InstIdFdivrp_ExtendedIndex = 60, + kX86InstIdFemms_ExtendedIndex = 68, + kX86InstIdFfree_ExtendedIndex = 69, + kX86InstIdFiadd_ExtendedIndex = 70, + kX86InstIdFicom_ExtendedIndex = 70, + kX86InstIdFicomp_ExtendedIndex = 70, + kX86InstIdFidiv_ExtendedIndex = 70, + kX86InstIdFidivr_ExtendedIndex = 70, + kX86InstIdFild_ExtendedIndex = 71, + kX86InstIdFimul_ExtendedIndex = 70, + kX86InstIdFincstp_ExtendedIndex = 58, + kX86InstIdFinit_ExtendedIndex = 58, + kX86InstIdFist_ExtendedIndex = 70, + kX86InstIdFistp_ExtendedIndex = 72, + kX86InstIdFisttp_ExtendedIndex = 73, + kX86InstIdFisub_ExtendedIndex = 70, + kX86InstIdFisubr_ExtendedIndex = 70, + kX86InstIdFld_ExtendedIndex = 74, + kX86InstIdFld1_ExtendedIndex = 58, + kX86InstIdFldcw_ExtendedIndex = 61, + kX86InstIdFldenv_ExtendedIndex = 61, + kX86InstIdFldl2e_ExtendedIndex = 58, + kX86InstIdFldl2t_ExtendedIndex = 58, + kX86InstIdFldlg2_ExtendedIndex = 58, + kX86InstIdFldln2_ExtendedIndex = 58, + kX86InstIdFldpi_ExtendedIndex = 58, + kX86InstIdFldz_ExtendedIndex = 58, + kX86InstIdFmul_ExtendedIndex = 59, + kX86InstIdFmulp_ExtendedIndex = 60, + kX86InstIdFnclex_ExtendedIndex = 58, + kX86InstIdFninit_ExtendedIndex = 58, + kX86InstIdFnop_ExtendedIndex = 58, + kX86InstIdFnsave_ExtendedIndex = 61, + kX86InstIdFnstcw_ExtendedIndex = 61, + kX86InstIdFnstenv_ExtendedIndex = 61, + kX86InstIdFnstsw_ExtendedIndex = 75, + kX86InstIdFpatan_ExtendedIndex = 58, + kX86InstIdFprem_ExtendedIndex = 58, + kX86InstIdFprem1_ExtendedIndex = 58, + kX86InstIdFptan_ExtendedIndex = 58, + kX86InstIdFrndint_ExtendedIndex = 58, + kX86InstIdFrstor_ExtendedIndex = 61, + kX86InstIdFsave_ExtendedIndex = 61, + kX86InstIdFscale_ExtendedIndex = 58, + kX86InstIdFsin_ExtendedIndex = 58, + kX86InstIdFsincos_ExtendedIndex = 58, + kX86InstIdFsqrt_ExtendedIndex = 58, + kX86InstIdFst_ExtendedIndex = 76, + kX86InstIdFstcw_ExtendedIndex = 61, + kX86InstIdFstenv_ExtendedIndex = 61, + kX86InstIdFstp_ExtendedIndex = 77, + kX86InstIdFstsw_ExtendedIndex = 78, + kX86InstIdFsub_ExtendedIndex = 59, + kX86InstIdFsubp_ExtendedIndex = 60, + kX86InstIdFsubr_ExtendedIndex = 59, + kX86InstIdFsubrp_ExtendedIndex = 60, + kX86InstIdFtst_ExtendedIndex = 58, + kX86InstIdFucom_ExtendedIndex = 60, + kX86InstIdFucomi_ExtendedIndex = 67, + kX86InstIdFucomip_ExtendedIndex = 67, + kX86InstIdFucomp_ExtendedIndex = 60, + kX86InstIdFucompp_ExtendedIndex = 58, + kX86InstIdFwait_ExtendedIndex = 79, + kX86InstIdFxam_ExtendedIndex = 58, + kX86InstIdFxch_ExtendedIndex = 69, + kX86InstIdFxrstor_ExtendedIndex = 61, + kX86InstIdFxrstor64_ExtendedIndex = 61, + kX86InstIdFxsave_ExtendedIndex = 61, + kX86InstIdFxsave64_ExtendedIndex = 61, + kX86InstIdFxtract_ExtendedIndex = 58, + kX86InstIdFyl2x_ExtendedIndex = 58, + kX86InstIdFyl2xp1_ExtendedIndex = 58, + kX86InstIdHaddpd_ExtendedIndex = 4, + kX86InstIdHaddps_ExtendedIndex = 4, + kX86InstIdHsubpd_ExtendedIndex = 4, + kX86InstIdHsubps_ExtendedIndex = 4, + kX86InstIdIdiv_ExtendedIndex = 80, + kX86InstIdImul_ExtendedIndex = 81, + kX86InstIdInc_ExtendedIndex = 82, + kX86InstIdInsertps_ExtendedIndex = 11, + kX86InstIdInsertq_ExtendedIndex = 83, + kX86InstIdInt_ExtendedIndex = 84, + kX86InstIdJa_ExtendedIndex = 85, + kX86InstIdJae_ExtendedIndex = 86, + kX86InstIdJb_ExtendedIndex = 86, + kX86InstIdJbe_ExtendedIndex = 85, + kX86InstIdJc_ExtendedIndex = 86, + kX86InstIdJe_ExtendedIndex = 87, + kX86InstIdJg_ExtendedIndex = 88, + kX86InstIdJge_ExtendedIndex = 89, + kX86InstIdJl_ExtendedIndex = 89, + kX86InstIdJle_ExtendedIndex = 88, + kX86InstIdJna_ExtendedIndex = 85, + kX86InstIdJnae_ExtendedIndex = 86, + kX86InstIdJnb_ExtendedIndex = 86, + kX86InstIdJnbe_ExtendedIndex = 85, + kX86InstIdJnc_ExtendedIndex = 86, + kX86InstIdJne_ExtendedIndex = 87, + kX86InstIdJng_ExtendedIndex = 88, + kX86InstIdJnge_ExtendedIndex = 89, + kX86InstIdJnl_ExtendedIndex = 89, + kX86InstIdJnle_ExtendedIndex = 88, + kX86InstIdJno_ExtendedIndex = 90, + kX86InstIdJnp_ExtendedIndex = 91, + kX86InstIdJns_ExtendedIndex = 92, + kX86InstIdJnz_ExtendedIndex = 87, + kX86InstIdJo_ExtendedIndex = 90, + kX86InstIdJp_ExtendedIndex = 91, + kX86InstIdJpe_ExtendedIndex = 91, + kX86InstIdJpo_ExtendedIndex = 91, + kX86InstIdJs_ExtendedIndex = 92, + kX86InstIdJz_ExtendedIndex = 87, + kX86InstIdJecxz_ExtendedIndex = 93, + kX86InstIdJmp_ExtendedIndex = 94, + kX86InstIdLahf_ExtendedIndex = 95, + kX86InstIdLddqu_ExtendedIndex = 96, + kX86InstIdLdmxcsr_ExtendedIndex = 24, + kX86InstIdLea_ExtendedIndex = 97, + kX86InstIdLeave_ExtendedIndex = 98, + kX86InstIdLfence_ExtendedIndex = 99, + kX86InstIdLodsB_ExtendedIndex = 100, + kX86InstIdLodsD_ExtendedIndex = 101, + kX86InstIdLodsQ_ExtendedIndex = 102, + kX86InstIdLodsW_ExtendedIndex = 103, + kX86InstIdLzcnt_ExtendedIndex = 14, + kX86InstIdMaskmovdqu_ExtendedIndex = 104, + kX86InstIdMaskmovq_ExtendedIndex = 105, + kX86InstIdMaxpd_ExtendedIndex = 4, + kX86InstIdMaxps_ExtendedIndex = 4, + kX86InstIdMaxsd_ExtendedIndex = 4, + kX86InstIdMaxss_ExtendedIndex = 4, + kX86InstIdMfence_ExtendedIndex = 106, + kX86InstIdMinpd_ExtendedIndex = 4, + kX86InstIdMinps_ExtendedIndex = 4, + kX86InstIdMinsd_ExtendedIndex = 4, + kX86InstIdMinss_ExtendedIndex = 4, + kX86InstIdMonitor_ExtendedIndex = 107, + kX86InstIdMov_ExtendedIndex = 108, + kX86InstIdMovPtr_ExtendedIndex = 109, + kX86InstIdMovapd_ExtendedIndex = 110, + kX86InstIdMovaps_ExtendedIndex = 111, + kX86InstIdMovbe_ExtendedIndex = 112, + kX86InstIdMovd_ExtendedIndex = 113, + kX86InstIdMovddup_ExtendedIndex = 114, + kX86InstIdMovdq2q_ExtendedIndex = 115, + kX86InstIdMovdqa_ExtendedIndex = 116, + kX86InstIdMovdqu_ExtendedIndex = 117, + kX86InstIdMovhlps_ExtendedIndex = 118, + kX86InstIdMovhpd_ExtendedIndex = 119, + kX86InstIdMovhps_ExtendedIndex = 120, + kX86InstIdMovlhps_ExtendedIndex = 121, + kX86InstIdMovlpd_ExtendedIndex = 122, + kX86InstIdMovlps_ExtendedIndex = 123, + kX86InstIdMovmskpd_ExtendedIndex = 124, + kX86InstIdMovmskps_ExtendedIndex = 124, + kX86InstIdMovntdq_ExtendedIndex = 125, + kX86InstIdMovntdqa_ExtendedIndex = 126, + kX86InstIdMovnti_ExtendedIndex = 127, + kX86InstIdMovntpd_ExtendedIndex = 128, + kX86InstIdMovntps_ExtendedIndex = 129, + kX86InstIdMovntq_ExtendedIndex = 130, + kX86InstIdMovntsd_ExtendedIndex = 131, + kX86InstIdMovntss_ExtendedIndex = 132, + kX86InstIdMovq_ExtendedIndex = 133, + kX86InstIdMovq2dq_ExtendedIndex = 134, + kX86InstIdMovsB_ExtendedIndex = 135, + kX86InstIdMovsD_ExtendedIndex = 135, + kX86InstIdMovsQ_ExtendedIndex = 135, + kX86InstIdMovsW_ExtendedIndex = 136, + kX86InstIdMovsd_ExtendedIndex = 137, + kX86InstIdMovshdup_ExtendedIndex = 42, + kX86InstIdMovsldup_ExtendedIndex = 42, + kX86InstIdMovss_ExtendedIndex = 138, + kX86InstIdMovsx_ExtendedIndex = 139, + kX86InstIdMovsxd_ExtendedIndex = 140, + kX86InstIdMovupd_ExtendedIndex = 141, + kX86InstIdMovups_ExtendedIndex = 142, + kX86InstIdMovzx_ExtendedIndex = 139, + kX86InstIdMpsadbw_ExtendedIndex = 11, + kX86InstIdMul_ExtendedIndex = 80, + kX86InstIdMulpd_ExtendedIndex = 4, + kX86InstIdMulps_ExtendedIndex = 4, + kX86InstIdMulsd_ExtendedIndex = 4, + kX86InstIdMulss_ExtendedIndex = 4, + kX86InstIdMulx_ExtendedIndex = 143, + kX86InstIdMwait_ExtendedIndex = 107, + kX86InstIdNeg_ExtendedIndex = 144, + kX86InstIdNop_ExtendedIndex = 145, + kX86InstIdNot_ExtendedIndex = 146, + kX86InstIdOr_ExtendedIndex = 3, + kX86InstIdOrpd_ExtendedIndex = 4, + kX86InstIdOrps_ExtendedIndex = 4, + kX86InstIdPabsb_ExtendedIndex = 147, + kX86InstIdPabsd_ExtendedIndex = 147, + kX86InstIdPabsw_ExtendedIndex = 147, + kX86InstIdPackssdw_ExtendedIndex = 147, + kX86InstIdPacksswb_ExtendedIndex = 147, + kX86InstIdPackusdw_ExtendedIndex = 4, + kX86InstIdPackuswb_ExtendedIndex = 147, + kX86InstIdPaddb_ExtendedIndex = 147, + kX86InstIdPaddd_ExtendedIndex = 147, + kX86InstIdPaddq_ExtendedIndex = 147, + kX86InstIdPaddsb_ExtendedIndex = 147, + kX86InstIdPaddsw_ExtendedIndex = 147, + kX86InstIdPaddusb_ExtendedIndex = 147, + kX86InstIdPaddusw_ExtendedIndex = 147, + kX86InstIdPaddw_ExtendedIndex = 147, + kX86InstIdPalignr_ExtendedIndex = 148, + kX86InstIdPand_ExtendedIndex = 147, + kX86InstIdPandn_ExtendedIndex = 147, + kX86InstIdPause_ExtendedIndex = 149, + kX86InstIdPavgb_ExtendedIndex = 147, + kX86InstIdPavgusb_ExtendedIndex = 150, + kX86InstIdPavgw_ExtendedIndex = 147, + kX86InstIdPblendvb_ExtendedIndex = 12, + kX86InstIdPblendw_ExtendedIndex = 11, + kX86InstIdPclmulqdq_ExtendedIndex = 11, + kX86InstIdPcmpeqb_ExtendedIndex = 147, + kX86InstIdPcmpeqd_ExtendedIndex = 147, + kX86InstIdPcmpeqq_ExtendedIndex = 4, + kX86InstIdPcmpeqw_ExtendedIndex = 147, + kX86InstIdPcmpestri_ExtendedIndex = 151, + kX86InstIdPcmpestrm_ExtendedIndex = 151, + kX86InstIdPcmpgtb_ExtendedIndex = 147, + kX86InstIdPcmpgtd_ExtendedIndex = 147, + kX86InstIdPcmpgtq_ExtendedIndex = 4, + kX86InstIdPcmpgtw_ExtendedIndex = 147, + kX86InstIdPcmpistri_ExtendedIndex = 151, + kX86InstIdPcmpistrm_ExtendedIndex = 151, + kX86InstIdPdep_ExtendedIndex = 152, + kX86InstIdPext_ExtendedIndex = 152, + kX86InstIdPextrb_ExtendedIndex = 153, + kX86InstIdPextrd_ExtendedIndex = 154, + kX86InstIdPextrq_ExtendedIndex = 56, + kX86InstIdPextrw_ExtendedIndex = 155, + kX86InstIdPf2id_ExtendedIndex = 156, + kX86InstIdPf2iw_ExtendedIndex = 156, + kX86InstIdPfacc_ExtendedIndex = 150, + kX86InstIdPfadd_ExtendedIndex = 150, + kX86InstIdPfcmpeq_ExtendedIndex = 150, + kX86InstIdPfcmpge_ExtendedIndex = 150, + kX86InstIdPfcmpgt_ExtendedIndex = 150, + kX86InstIdPfmax_ExtendedIndex = 150, + kX86InstIdPfmin_ExtendedIndex = 150, + kX86InstIdPfmul_ExtendedIndex = 150, + kX86InstIdPfnacc_ExtendedIndex = 150, + kX86InstIdPfpnacc_ExtendedIndex = 150, + kX86InstIdPfrcp_ExtendedIndex = 156, + kX86InstIdPfrcpit1_ExtendedIndex = 150, + kX86InstIdPfrcpit2_ExtendedIndex = 150, + kX86InstIdPfrsqit1_ExtendedIndex = 150, + kX86InstIdPfrsqrt_ExtendedIndex = 150, + kX86InstIdPfsub_ExtendedIndex = 150, + kX86InstIdPfsubr_ExtendedIndex = 150, + kX86InstIdPhaddd_ExtendedIndex = 147, + kX86InstIdPhaddsw_ExtendedIndex = 147, + kX86InstIdPhaddw_ExtendedIndex = 147, + kX86InstIdPhminposuw_ExtendedIndex = 4, + kX86InstIdPhsubd_ExtendedIndex = 147, + kX86InstIdPhsubsw_ExtendedIndex = 147, + kX86InstIdPhsubw_ExtendedIndex = 147, + kX86InstIdPi2fd_ExtendedIndex = 156, + kX86InstIdPi2fw_ExtendedIndex = 156, + kX86InstIdPinsrb_ExtendedIndex = 157, + kX86InstIdPinsrd_ExtendedIndex = 157, + kX86InstIdPinsrq_ExtendedIndex = 158, + kX86InstIdPinsrw_ExtendedIndex = 159, + kX86InstIdPmaddubsw_ExtendedIndex = 147, + kX86InstIdPmaddwd_ExtendedIndex = 147, + kX86InstIdPmaxsb_ExtendedIndex = 4, + kX86InstIdPmaxsd_ExtendedIndex = 4, + kX86InstIdPmaxsw_ExtendedIndex = 147, + kX86InstIdPmaxub_ExtendedIndex = 147, + kX86InstIdPmaxud_ExtendedIndex = 4, + kX86InstIdPmaxuw_ExtendedIndex = 4, + kX86InstIdPminsb_ExtendedIndex = 4, + kX86InstIdPminsd_ExtendedIndex = 4, + kX86InstIdPminsw_ExtendedIndex = 147, + kX86InstIdPminub_ExtendedIndex = 147, + kX86InstIdPminud_ExtendedIndex = 4, + kX86InstIdPminuw_ExtendedIndex = 4, + kX86InstIdPmovmskb_ExtendedIndex = 160, + kX86InstIdPmovsxbd_ExtendedIndex = 42, + kX86InstIdPmovsxbq_ExtendedIndex = 42, + kX86InstIdPmovsxbw_ExtendedIndex = 42, + kX86InstIdPmovsxdq_ExtendedIndex = 42, + kX86InstIdPmovsxwd_ExtendedIndex = 42, + kX86InstIdPmovsxwq_ExtendedIndex = 42, + kX86InstIdPmovzxbd_ExtendedIndex = 42, + kX86InstIdPmovzxbq_ExtendedIndex = 42, + kX86InstIdPmovzxbw_ExtendedIndex = 42, + kX86InstIdPmovzxdq_ExtendedIndex = 42, + kX86InstIdPmovzxwd_ExtendedIndex = 42, + kX86InstIdPmovzxwq_ExtendedIndex = 42, + kX86InstIdPmuldq_ExtendedIndex = 4, + kX86InstIdPmulhrsw_ExtendedIndex = 147, + kX86InstIdPmulhrw_ExtendedIndex = 150, + kX86InstIdPmulhuw_ExtendedIndex = 147, + kX86InstIdPmulhw_ExtendedIndex = 147, + kX86InstIdPmulld_ExtendedIndex = 4, + kX86InstIdPmullw_ExtendedIndex = 147, + kX86InstIdPmuludq_ExtendedIndex = 147, + kX86InstIdPop_ExtendedIndex = 161, + kX86InstIdPopa_ExtendedIndex = 98, + kX86InstIdPopcnt_ExtendedIndex = 162, + kX86InstIdPopf_ExtendedIndex = 163, + kX86InstIdPor_ExtendedIndex = 147, + kX86InstIdPrefetch_ExtendedIndex = 164, + kX86InstIdPrefetch3dNow_ExtendedIndex = 24, + kX86InstIdPrefetchw_ExtendedIndex = 165, + kX86InstIdPrefetchwt1_ExtendedIndex = 165, + kX86InstIdPsadbw_ExtendedIndex = 147, + kX86InstIdPshufb_ExtendedIndex = 147, + kX86InstIdPshufd_ExtendedIndex = 166, + kX86InstIdPshufhw_ExtendedIndex = 166, + kX86InstIdPshuflw_ExtendedIndex = 166, + kX86InstIdPshufw_ExtendedIndex = 167, + kX86InstIdPsignb_ExtendedIndex = 147, + kX86InstIdPsignd_ExtendedIndex = 147, + kX86InstIdPsignw_ExtendedIndex = 147, + kX86InstIdPslld_ExtendedIndex = 168, + kX86InstIdPslldq_ExtendedIndex = 169, + kX86InstIdPsllq_ExtendedIndex = 170, + kX86InstIdPsllw_ExtendedIndex = 171, + kX86InstIdPsrad_ExtendedIndex = 172, + kX86InstIdPsraw_ExtendedIndex = 173, + kX86InstIdPsrld_ExtendedIndex = 174, + kX86InstIdPsrldq_ExtendedIndex = 175, + kX86InstIdPsrlq_ExtendedIndex = 176, + kX86InstIdPsrlw_ExtendedIndex = 177, + kX86InstIdPsubb_ExtendedIndex = 147, + kX86InstIdPsubd_ExtendedIndex = 147, + kX86InstIdPsubq_ExtendedIndex = 147, + kX86InstIdPsubsb_ExtendedIndex = 147, + kX86InstIdPsubsw_ExtendedIndex = 147, + kX86InstIdPsubusb_ExtendedIndex = 147, + kX86InstIdPsubusw_ExtendedIndex = 147, + kX86InstIdPsubw_ExtendedIndex = 147, + kX86InstIdPswapd_ExtendedIndex = 156, + kX86InstIdPtest_ExtendedIndex = 39, + kX86InstIdPunpckhbw_ExtendedIndex = 147, + kX86InstIdPunpckhdq_ExtendedIndex = 147, + kX86InstIdPunpckhqdq_ExtendedIndex = 4, + kX86InstIdPunpckhwd_ExtendedIndex = 147, + kX86InstIdPunpcklbw_ExtendedIndex = 147, + kX86InstIdPunpckldq_ExtendedIndex = 147, + kX86InstIdPunpcklqdq_ExtendedIndex = 4, + kX86InstIdPunpcklwd_ExtendedIndex = 147, + kX86InstIdPush_ExtendedIndex = 178, + kX86InstIdPusha_ExtendedIndex = 98, + kX86InstIdPushf_ExtendedIndex = 179, + kX86InstIdPxor_ExtendedIndex = 147, + kX86InstIdRcl_ExtendedIndex = 180, + kX86InstIdRcpps_ExtendedIndex = 42, + kX86InstIdRcpss_ExtendedIndex = 47, + kX86InstIdRcr_ExtendedIndex = 180, + kX86InstIdRdfsbase_ExtendedIndex = 181, + kX86InstIdRdgsbase_ExtendedIndex = 181, + kX86InstIdRdrand_ExtendedIndex = 182, + kX86InstIdRdseed_ExtendedIndex = 182, + kX86InstIdRdtsc_ExtendedIndex = 183, + kX86InstIdRdtscp_ExtendedIndex = 183, + kX86InstIdRepLodsB_ExtendedIndex = 184, + kX86InstIdRepLodsD_ExtendedIndex = 184, + kX86InstIdRepLodsQ_ExtendedIndex = 184, + kX86InstIdRepLodsW_ExtendedIndex = 184, + kX86InstIdRepMovsB_ExtendedIndex = 185, + kX86InstIdRepMovsD_ExtendedIndex = 185, + kX86InstIdRepMovsQ_ExtendedIndex = 185, + kX86InstIdRepMovsW_ExtendedIndex = 185, + kX86InstIdRepStosB_ExtendedIndex = 184, + kX86InstIdRepStosD_ExtendedIndex = 184, + kX86InstIdRepStosQ_ExtendedIndex = 184, + kX86InstIdRepStosW_ExtendedIndex = 184, + kX86InstIdRepeCmpsB_ExtendedIndex = 186, + kX86InstIdRepeCmpsD_ExtendedIndex = 186, + kX86InstIdRepeCmpsQ_ExtendedIndex = 186, + kX86InstIdRepeCmpsW_ExtendedIndex = 186, + kX86InstIdRepeScasB_ExtendedIndex = 186, + kX86InstIdRepeScasD_ExtendedIndex = 186, + kX86InstIdRepeScasQ_ExtendedIndex = 186, + kX86InstIdRepeScasW_ExtendedIndex = 186, + kX86InstIdRepneCmpsB_ExtendedIndex = 186, + kX86InstIdRepneCmpsD_ExtendedIndex = 186, + kX86InstIdRepneCmpsQ_ExtendedIndex = 186, + kX86InstIdRepneCmpsW_ExtendedIndex = 186, + kX86InstIdRepneScasB_ExtendedIndex = 186, + kX86InstIdRepneScasD_ExtendedIndex = 186, + kX86InstIdRepneScasQ_ExtendedIndex = 186, + kX86InstIdRepneScasW_ExtendedIndex = 186, + kX86InstIdRet_ExtendedIndex = 187, + kX86InstIdRol_ExtendedIndex = 188, + kX86InstIdRor_ExtendedIndex = 188, + kX86InstIdRorx_ExtendedIndex = 189, + kX86InstIdRoundpd_ExtendedIndex = 166, + kX86InstIdRoundps_ExtendedIndex = 166, + kX86InstIdRoundsd_ExtendedIndex = 190, + kX86InstIdRoundss_ExtendedIndex = 191, + kX86InstIdRsqrtps_ExtendedIndex = 42, + kX86InstIdRsqrtss_ExtendedIndex = 47, + kX86InstIdSahf_ExtendedIndex = 192, + kX86InstIdSal_ExtendedIndex = 193, + kX86InstIdSar_ExtendedIndex = 193, + kX86InstIdSarx_ExtendedIndex = 194, + kX86InstIdSbb_ExtendedIndex = 1, + kX86InstIdScasB_ExtendedIndex = 35, + kX86InstIdScasD_ExtendedIndex = 35, + kX86InstIdScasQ_ExtendedIndex = 35, + kX86InstIdScasW_ExtendedIndex = 36, + kX86InstIdSeta_ExtendedIndex = 195, + kX86InstIdSetae_ExtendedIndex = 196, + kX86InstIdSetb_ExtendedIndex = 196, + kX86InstIdSetbe_ExtendedIndex = 195, + kX86InstIdSetc_ExtendedIndex = 196, + kX86InstIdSete_ExtendedIndex = 197, + kX86InstIdSetg_ExtendedIndex = 198, + kX86InstIdSetge_ExtendedIndex = 199, + kX86InstIdSetl_ExtendedIndex = 199, + kX86InstIdSetle_ExtendedIndex = 198, + kX86InstIdSetna_ExtendedIndex = 195, + kX86InstIdSetnae_ExtendedIndex = 196, + kX86InstIdSetnb_ExtendedIndex = 196, + kX86InstIdSetnbe_ExtendedIndex = 195, + kX86InstIdSetnc_ExtendedIndex = 196, + kX86InstIdSetne_ExtendedIndex = 197, + kX86InstIdSetng_ExtendedIndex = 198, + kX86InstIdSetnge_ExtendedIndex = 199, + kX86InstIdSetnl_ExtendedIndex = 199, + kX86InstIdSetnle_ExtendedIndex = 198, + kX86InstIdSetno_ExtendedIndex = 200, + kX86InstIdSetnp_ExtendedIndex = 201, + kX86InstIdSetns_ExtendedIndex = 202, + kX86InstIdSetnz_ExtendedIndex = 197, + kX86InstIdSeto_ExtendedIndex = 200, + kX86InstIdSetp_ExtendedIndex = 201, + kX86InstIdSetpe_ExtendedIndex = 201, + kX86InstIdSetpo_ExtendedIndex = 201, + kX86InstIdSets_ExtendedIndex = 202, + kX86InstIdSetz_ExtendedIndex = 197, + kX86InstIdSfence_ExtendedIndex = 99, + kX86InstIdSha1msg1_ExtendedIndex = 4, + kX86InstIdSha1msg2_ExtendedIndex = 4, + kX86InstIdSha1nexte_ExtendedIndex = 4, + kX86InstIdSha1rnds4_ExtendedIndex = 11, + kX86InstIdSha256msg1_ExtendedIndex = 4, + kX86InstIdSha256msg2_ExtendedIndex = 4, + kX86InstIdSha256rnds2_ExtendedIndex = 4, + kX86InstIdShl_ExtendedIndex = 193, + kX86InstIdShld_ExtendedIndex = 203, + kX86InstIdShlx_ExtendedIndex = 194, + kX86InstIdShr_ExtendedIndex = 193, + kX86InstIdShrd_ExtendedIndex = 204, + kX86InstIdShrx_ExtendedIndex = 194, + kX86InstIdShufpd_ExtendedIndex = 11, + kX86InstIdShufps_ExtendedIndex = 11, + kX86InstIdSqrtpd_ExtendedIndex = 42, + kX86InstIdSqrtps_ExtendedIndex = 42, + kX86InstIdSqrtsd_ExtendedIndex = 50, + kX86InstIdSqrtss_ExtendedIndex = 47, + kX86InstIdStc_ExtendedIndex = 205, + kX86InstIdStd_ExtendedIndex = 206, + kX86InstIdStmxcsr_ExtendedIndex = 207, + kX86InstIdStosB_ExtendedIndex = 208, + kX86InstIdStosD_ExtendedIndex = 208, + kX86InstIdStosQ_ExtendedIndex = 208, + kX86InstIdStosW_ExtendedIndex = 209, + kX86InstIdSub_ExtendedIndex = 3, + kX86InstIdSubpd_ExtendedIndex = 4, + kX86InstIdSubps_ExtendedIndex = 4, + kX86InstIdSubsd_ExtendedIndex = 4, + kX86InstIdSubss_ExtendedIndex = 4, + kX86InstIdT1mskc_ExtendedIndex = 10, + kX86InstIdTest_ExtendedIndex = 210, + kX86InstIdTzcnt_ExtendedIndex = 162, + kX86InstIdTzmsk_ExtendedIndex = 10, + kX86InstIdUcomisd_ExtendedIndex = 39, + kX86InstIdUcomiss_ExtendedIndex = 39, + kX86InstIdUd2_ExtendedIndex = 145, + kX86InstIdUnpckhpd_ExtendedIndex = 4, + kX86InstIdUnpckhps_ExtendedIndex = 4, + kX86InstIdUnpcklpd_ExtendedIndex = 4, + kX86InstIdUnpcklps_ExtendedIndex = 4, + kX86InstIdVaddpd_ExtendedIndex = 211, + kX86InstIdVaddps_ExtendedIndex = 211, + kX86InstIdVaddsd_ExtendedIndex = 212, + kX86InstIdVaddss_ExtendedIndex = 212, + kX86InstIdVaddsubpd_ExtendedIndex = 211, + kX86InstIdVaddsubps_ExtendedIndex = 211, + kX86InstIdVaesdec_ExtendedIndex = 212, + kX86InstIdVaesdeclast_ExtendedIndex = 212, + kX86InstIdVaesenc_ExtendedIndex = 212, + kX86InstIdVaesenclast_ExtendedIndex = 212, + kX86InstIdVaesimc_ExtendedIndex = 213, + kX86InstIdVaeskeygenassist_ExtendedIndex = 214, + kX86InstIdVandnpd_ExtendedIndex = 211, + kX86InstIdVandnps_ExtendedIndex = 211, + kX86InstIdVandpd_ExtendedIndex = 211, + kX86InstIdVandps_ExtendedIndex = 211, + kX86InstIdVblendpd_ExtendedIndex = 215, + kX86InstIdVblendps_ExtendedIndex = 215, + kX86InstIdVblendvpd_ExtendedIndex = 216, + kX86InstIdVblendvps_ExtendedIndex = 216, + kX86InstIdVbroadcastf128_ExtendedIndex = 217, + kX86InstIdVbroadcasti128_ExtendedIndex = 217, + kX86InstIdVbroadcastsd_ExtendedIndex = 218, + kX86InstIdVbroadcastss_ExtendedIndex = 219, + kX86InstIdVcmppd_ExtendedIndex = 215, + kX86InstIdVcmpps_ExtendedIndex = 215, + kX86InstIdVcmpsd_ExtendedIndex = 220, + kX86InstIdVcmpss_ExtendedIndex = 220, + kX86InstIdVcomisd_ExtendedIndex = 221, + kX86InstIdVcomiss_ExtendedIndex = 221, + kX86InstIdVcvtdq2pd_ExtendedIndex = 219, + kX86InstIdVcvtdq2ps_ExtendedIndex = 222, + kX86InstIdVcvtpd2dq_ExtendedIndex = 223, + kX86InstIdVcvtpd2ps_ExtendedIndex = 223, + kX86InstIdVcvtph2ps_ExtendedIndex = 219, + kX86InstIdVcvtps2dq_ExtendedIndex = 222, + kX86InstIdVcvtps2pd_ExtendedIndex = 219, + kX86InstIdVcvtps2ph_ExtendedIndex = 224, + kX86InstIdVcvtsd2si_ExtendedIndex = 225, + kX86InstIdVcvtsd2ss_ExtendedIndex = 212, + kX86InstIdVcvtsi2sd_ExtendedIndex = 226, + kX86InstIdVcvtsi2ss_ExtendedIndex = 226, + kX86InstIdVcvtss2sd_ExtendedIndex = 212, + kX86InstIdVcvtss2si_ExtendedIndex = 225, + kX86InstIdVcvttpd2dq_ExtendedIndex = 227, + kX86InstIdVcvttps2dq_ExtendedIndex = 222, + kX86InstIdVcvttsd2si_ExtendedIndex = 225, + kX86InstIdVcvttss2si_ExtendedIndex = 225, + kX86InstIdVdivpd_ExtendedIndex = 211, + kX86InstIdVdivps_ExtendedIndex = 211, + kX86InstIdVdivsd_ExtendedIndex = 212, + kX86InstIdVdivss_ExtendedIndex = 212, + kX86InstIdVdppd_ExtendedIndex = 220, + kX86InstIdVdpps_ExtendedIndex = 215, + kX86InstIdVextractf128_ExtendedIndex = 228, + kX86InstIdVextracti128_ExtendedIndex = 228, + kX86InstIdVextractps_ExtendedIndex = 229, + kX86InstIdVfmadd132pd_ExtendedIndex = 230, + kX86InstIdVfmadd132ps_ExtendedIndex = 230, + kX86InstIdVfmadd132sd_ExtendedIndex = 231, + kX86InstIdVfmadd132ss_ExtendedIndex = 231, + kX86InstIdVfmadd213pd_ExtendedIndex = 230, + kX86InstIdVfmadd213ps_ExtendedIndex = 230, + kX86InstIdVfmadd213sd_ExtendedIndex = 231, + kX86InstIdVfmadd213ss_ExtendedIndex = 231, + kX86InstIdVfmadd231pd_ExtendedIndex = 230, + kX86InstIdVfmadd231ps_ExtendedIndex = 230, + kX86InstIdVfmadd231sd_ExtendedIndex = 231, + kX86InstIdVfmadd231ss_ExtendedIndex = 231, + kX86InstIdVfmaddpd_ExtendedIndex = 232, + kX86InstIdVfmaddps_ExtendedIndex = 232, + kX86InstIdVfmaddsd_ExtendedIndex = 233, + kX86InstIdVfmaddss_ExtendedIndex = 233, + kX86InstIdVfmaddsub132pd_ExtendedIndex = 230, + kX86InstIdVfmaddsub132ps_ExtendedIndex = 230, + kX86InstIdVfmaddsub213pd_ExtendedIndex = 230, + kX86InstIdVfmaddsub213ps_ExtendedIndex = 230, + kX86InstIdVfmaddsub231pd_ExtendedIndex = 230, + kX86InstIdVfmaddsub231ps_ExtendedIndex = 230, + kX86InstIdVfmaddsubpd_ExtendedIndex = 232, + kX86InstIdVfmaddsubps_ExtendedIndex = 232, + kX86InstIdVfmsub132pd_ExtendedIndex = 230, + kX86InstIdVfmsub132ps_ExtendedIndex = 230, + kX86InstIdVfmsub132sd_ExtendedIndex = 231, + kX86InstIdVfmsub132ss_ExtendedIndex = 231, + kX86InstIdVfmsub213pd_ExtendedIndex = 230, + kX86InstIdVfmsub213ps_ExtendedIndex = 230, + kX86InstIdVfmsub213sd_ExtendedIndex = 231, + kX86InstIdVfmsub213ss_ExtendedIndex = 231, + kX86InstIdVfmsub231pd_ExtendedIndex = 230, + kX86InstIdVfmsub231ps_ExtendedIndex = 230, + kX86InstIdVfmsub231sd_ExtendedIndex = 231, + kX86InstIdVfmsub231ss_ExtendedIndex = 231, + kX86InstIdVfmsubadd132pd_ExtendedIndex = 230, + kX86InstIdVfmsubadd132ps_ExtendedIndex = 230, + kX86InstIdVfmsubadd213pd_ExtendedIndex = 230, + kX86InstIdVfmsubadd213ps_ExtendedIndex = 230, + kX86InstIdVfmsubadd231pd_ExtendedIndex = 230, + kX86InstIdVfmsubadd231ps_ExtendedIndex = 230, + kX86InstIdVfmsubaddpd_ExtendedIndex = 232, + kX86InstIdVfmsubaddps_ExtendedIndex = 232, + kX86InstIdVfmsubpd_ExtendedIndex = 232, + kX86InstIdVfmsubps_ExtendedIndex = 232, + kX86InstIdVfmsubsd_ExtendedIndex = 233, + kX86InstIdVfmsubss_ExtendedIndex = 233, + kX86InstIdVfnmadd132pd_ExtendedIndex = 230, + kX86InstIdVfnmadd132ps_ExtendedIndex = 230, + kX86InstIdVfnmadd132sd_ExtendedIndex = 231, + kX86InstIdVfnmadd132ss_ExtendedIndex = 231, + kX86InstIdVfnmadd213pd_ExtendedIndex = 230, + kX86InstIdVfnmadd213ps_ExtendedIndex = 230, + kX86InstIdVfnmadd213sd_ExtendedIndex = 231, + kX86InstIdVfnmadd213ss_ExtendedIndex = 231, + kX86InstIdVfnmadd231pd_ExtendedIndex = 230, + kX86InstIdVfnmadd231ps_ExtendedIndex = 230, + kX86InstIdVfnmadd231sd_ExtendedIndex = 231, + kX86InstIdVfnmadd231ss_ExtendedIndex = 231, + kX86InstIdVfnmaddpd_ExtendedIndex = 232, + kX86InstIdVfnmaddps_ExtendedIndex = 232, + kX86InstIdVfnmaddsd_ExtendedIndex = 233, + kX86InstIdVfnmaddss_ExtendedIndex = 233, + kX86InstIdVfnmsub132pd_ExtendedIndex = 230, + kX86InstIdVfnmsub132ps_ExtendedIndex = 230, + kX86InstIdVfnmsub132sd_ExtendedIndex = 231, + kX86InstIdVfnmsub132ss_ExtendedIndex = 231, + kX86InstIdVfnmsub213pd_ExtendedIndex = 230, + kX86InstIdVfnmsub213ps_ExtendedIndex = 230, + kX86InstIdVfnmsub213sd_ExtendedIndex = 231, + kX86InstIdVfnmsub213ss_ExtendedIndex = 231, + kX86InstIdVfnmsub231pd_ExtendedIndex = 230, + kX86InstIdVfnmsub231ps_ExtendedIndex = 230, + kX86InstIdVfnmsub231sd_ExtendedIndex = 231, + kX86InstIdVfnmsub231ss_ExtendedIndex = 231, + kX86InstIdVfnmsubpd_ExtendedIndex = 232, + kX86InstIdVfnmsubps_ExtendedIndex = 232, + kX86InstIdVfnmsubsd_ExtendedIndex = 233, + kX86InstIdVfnmsubss_ExtendedIndex = 233, + kX86InstIdVfrczpd_ExtendedIndex = 234, + kX86InstIdVfrczps_ExtendedIndex = 234, + kX86InstIdVfrczsd_ExtendedIndex = 235, + kX86InstIdVfrczss_ExtendedIndex = 235, + kX86InstIdVgatherdpd_ExtendedIndex = 236, + kX86InstIdVgatherdps_ExtendedIndex = 236, + kX86InstIdVgatherqpd_ExtendedIndex = 236, + kX86InstIdVgatherqps_ExtendedIndex = 237, + kX86InstIdVhaddpd_ExtendedIndex = 211, + kX86InstIdVhaddps_ExtendedIndex = 211, + kX86InstIdVhsubpd_ExtendedIndex = 211, + kX86InstIdVhsubps_ExtendedIndex = 211, + kX86InstIdVinsertf128_ExtendedIndex = 238, + kX86InstIdVinserti128_ExtendedIndex = 238, + kX86InstIdVinsertps_ExtendedIndex = 220, + kX86InstIdVlddqu_ExtendedIndex = 239, + kX86InstIdVldmxcsr_ExtendedIndex = 240, + kX86InstIdVmaskmovdqu_ExtendedIndex = 241, + kX86InstIdVmaskmovpd_ExtendedIndex = 242, + kX86InstIdVmaskmovps_ExtendedIndex = 243, + kX86InstIdVmaxpd_ExtendedIndex = 211, + kX86InstIdVmaxps_ExtendedIndex = 211, + kX86InstIdVmaxsd_ExtendedIndex = 211, + kX86InstIdVmaxss_ExtendedIndex = 211, + kX86InstIdVminpd_ExtendedIndex = 211, + kX86InstIdVminps_ExtendedIndex = 211, + kX86InstIdVminsd_ExtendedIndex = 211, + kX86InstIdVminss_ExtendedIndex = 211, + kX86InstIdVmovapd_ExtendedIndex = 244, + kX86InstIdVmovaps_ExtendedIndex = 245, + kX86InstIdVmovd_ExtendedIndex = 246, + kX86InstIdVmovddup_ExtendedIndex = 222, + kX86InstIdVmovdqa_ExtendedIndex = 247, + kX86InstIdVmovdqu_ExtendedIndex = 248, + kX86InstIdVmovhlps_ExtendedIndex = 249, + kX86InstIdVmovhpd_ExtendedIndex = 250, + kX86InstIdVmovhps_ExtendedIndex = 251, + kX86InstIdVmovlhps_ExtendedIndex = 249, + kX86InstIdVmovlpd_ExtendedIndex = 252, + kX86InstIdVmovlps_ExtendedIndex = 253, + kX86InstIdVmovmskpd_ExtendedIndex = 254, + kX86InstIdVmovmskps_ExtendedIndex = 254, + kX86InstIdVmovntdq_ExtendedIndex = 255, + kX86InstIdVmovntdqa_ExtendedIndex = 239, + kX86InstIdVmovntpd_ExtendedIndex = 255, + kX86InstIdVmovntps_ExtendedIndex = 255, + kX86InstIdVmovq_ExtendedIndex = 246, + kX86InstIdVmovsd_ExtendedIndex = 256, + kX86InstIdVmovshdup_ExtendedIndex = 222, + kX86InstIdVmovsldup_ExtendedIndex = 222, + kX86InstIdVmovss_ExtendedIndex = 257, + kX86InstIdVmovupd_ExtendedIndex = 258, + kX86InstIdVmovups_ExtendedIndex = 259, + kX86InstIdVmpsadbw_ExtendedIndex = 215, + kX86InstIdVmulpd_ExtendedIndex = 211, + kX86InstIdVmulps_ExtendedIndex = 211, + kX86InstIdVmulsd_ExtendedIndex = 211, + kX86InstIdVmulss_ExtendedIndex = 211, + kX86InstIdVorpd_ExtendedIndex = 211, + kX86InstIdVorps_ExtendedIndex = 211, + kX86InstIdVpabsb_ExtendedIndex = 222, + kX86InstIdVpabsd_ExtendedIndex = 222, + kX86InstIdVpabsw_ExtendedIndex = 222, + kX86InstIdVpackssdw_ExtendedIndex = 211, + kX86InstIdVpacksswb_ExtendedIndex = 211, + kX86InstIdVpackusdw_ExtendedIndex = 211, + kX86InstIdVpackuswb_ExtendedIndex = 211, + kX86InstIdVpaddb_ExtendedIndex = 211, + kX86InstIdVpaddd_ExtendedIndex = 211, + kX86InstIdVpaddq_ExtendedIndex = 211, + kX86InstIdVpaddsb_ExtendedIndex = 211, + kX86InstIdVpaddsw_ExtendedIndex = 211, + kX86InstIdVpaddusb_ExtendedIndex = 211, + kX86InstIdVpaddusw_ExtendedIndex = 211, + kX86InstIdVpaddw_ExtendedIndex = 211, + kX86InstIdVpalignr_ExtendedIndex = 215, + kX86InstIdVpand_ExtendedIndex = 211, + kX86InstIdVpandn_ExtendedIndex = 211, + kX86InstIdVpavgb_ExtendedIndex = 211, + kX86InstIdVpavgw_ExtendedIndex = 211, + kX86InstIdVpblendd_ExtendedIndex = 215, + kX86InstIdVpblendvb_ExtendedIndex = 260, + kX86InstIdVpblendw_ExtendedIndex = 215, + kX86InstIdVpbroadcastb_ExtendedIndex = 219, + kX86InstIdVpbroadcastd_ExtendedIndex = 219, + kX86InstIdVpbroadcastq_ExtendedIndex = 219, + kX86InstIdVpbroadcastw_ExtendedIndex = 219, + kX86InstIdVpclmulqdq_ExtendedIndex = 220, + kX86InstIdVpcmov_ExtendedIndex = 261, + kX86InstIdVpcmpeqb_ExtendedIndex = 211, + kX86InstIdVpcmpeqd_ExtendedIndex = 211, + kX86InstIdVpcmpeqq_ExtendedIndex = 211, + kX86InstIdVpcmpeqw_ExtendedIndex = 211, + kX86InstIdVpcmpestri_ExtendedIndex = 262, + kX86InstIdVpcmpestrm_ExtendedIndex = 262, + kX86InstIdVpcmpgtb_ExtendedIndex = 211, + kX86InstIdVpcmpgtd_ExtendedIndex = 211, + kX86InstIdVpcmpgtq_ExtendedIndex = 211, + kX86InstIdVpcmpgtw_ExtendedIndex = 211, + kX86InstIdVpcmpistri_ExtendedIndex = 262, + kX86InstIdVpcmpistrm_ExtendedIndex = 262, + kX86InstIdVpcomb_ExtendedIndex = 263, + kX86InstIdVpcomd_ExtendedIndex = 263, + kX86InstIdVpcomq_ExtendedIndex = 263, + kX86InstIdVpcomub_ExtendedIndex = 263, + kX86InstIdVpcomud_ExtendedIndex = 263, + kX86InstIdVpcomuq_ExtendedIndex = 263, + kX86InstIdVpcomuw_ExtendedIndex = 263, + kX86InstIdVpcomw_ExtendedIndex = 263, + kX86InstIdVperm2f128_ExtendedIndex = 264, + kX86InstIdVperm2i128_ExtendedIndex = 264, + kX86InstIdVpermd_ExtendedIndex = 265, + kX86InstIdVpermil2pd_ExtendedIndex = 266, + kX86InstIdVpermil2ps_ExtendedIndex = 266, + kX86InstIdVpermilpd_ExtendedIndex = 267, + kX86InstIdVpermilps_ExtendedIndex = 268, + kX86InstIdVpermpd_ExtendedIndex = 269, + kX86InstIdVpermps_ExtendedIndex = 265, + kX86InstIdVpermq_ExtendedIndex = 269, + kX86InstIdVpextrb_ExtendedIndex = 270, + kX86InstIdVpextrd_ExtendedIndex = 229, + kX86InstIdVpextrq_ExtendedIndex = 271, + kX86InstIdVpextrw_ExtendedIndex = 272, + kX86InstIdVpgatherdd_ExtendedIndex = 236, + kX86InstIdVpgatherdq_ExtendedIndex = 236, + kX86InstIdVpgatherqd_ExtendedIndex = 237, + kX86InstIdVpgatherqq_ExtendedIndex = 236, + kX86InstIdVphaddbd_ExtendedIndex = 235, + kX86InstIdVphaddbq_ExtendedIndex = 235, + kX86InstIdVphaddbw_ExtendedIndex = 235, + kX86InstIdVphaddd_ExtendedIndex = 211, + kX86InstIdVphadddq_ExtendedIndex = 235, + kX86InstIdVphaddsw_ExtendedIndex = 211, + kX86InstIdVphaddubd_ExtendedIndex = 235, + kX86InstIdVphaddubq_ExtendedIndex = 235, + kX86InstIdVphaddubw_ExtendedIndex = 235, + kX86InstIdVphaddudq_ExtendedIndex = 235, + kX86InstIdVphadduwd_ExtendedIndex = 235, + kX86InstIdVphadduwq_ExtendedIndex = 235, + kX86InstIdVphaddw_ExtendedIndex = 211, + kX86InstIdVphaddwd_ExtendedIndex = 235, + kX86InstIdVphaddwq_ExtendedIndex = 235, + kX86InstIdVphminposuw_ExtendedIndex = 213, + kX86InstIdVphsubbw_ExtendedIndex = 235, + kX86InstIdVphsubd_ExtendedIndex = 211, + kX86InstIdVphsubdq_ExtendedIndex = 235, + kX86InstIdVphsubsw_ExtendedIndex = 211, + kX86InstIdVphsubw_ExtendedIndex = 211, + kX86InstIdVphsubwd_ExtendedIndex = 235, + kX86InstIdVpinsrb_ExtendedIndex = 273, + kX86InstIdVpinsrd_ExtendedIndex = 274, + kX86InstIdVpinsrq_ExtendedIndex = 275, + kX86InstIdVpinsrw_ExtendedIndex = 276, + kX86InstIdVpmacsdd_ExtendedIndex = 277, + kX86InstIdVpmacsdqh_ExtendedIndex = 277, + kX86InstIdVpmacsdql_ExtendedIndex = 277, + kX86InstIdVpmacssdd_ExtendedIndex = 277, + kX86InstIdVpmacssdqh_ExtendedIndex = 277, + kX86InstIdVpmacssdql_ExtendedIndex = 277, + kX86InstIdVpmacsswd_ExtendedIndex = 277, + kX86InstIdVpmacssww_ExtendedIndex = 277, + kX86InstIdVpmacswd_ExtendedIndex = 277, + kX86InstIdVpmacsww_ExtendedIndex = 277, + kX86InstIdVpmadcsswd_ExtendedIndex = 277, + kX86InstIdVpmadcswd_ExtendedIndex = 277, + kX86InstIdVpmaddubsw_ExtendedIndex = 211, + kX86InstIdVpmaddwd_ExtendedIndex = 211, + kX86InstIdVpmaskmovd_ExtendedIndex = 278, + kX86InstIdVpmaskmovq_ExtendedIndex = 278, + kX86InstIdVpmaxsb_ExtendedIndex = 211, + kX86InstIdVpmaxsd_ExtendedIndex = 211, + kX86InstIdVpmaxsw_ExtendedIndex = 211, + kX86InstIdVpmaxub_ExtendedIndex = 211, + kX86InstIdVpmaxud_ExtendedIndex = 211, + kX86InstIdVpmaxuw_ExtendedIndex = 211, + kX86InstIdVpminsb_ExtendedIndex = 211, + kX86InstIdVpminsd_ExtendedIndex = 211, + kX86InstIdVpminsw_ExtendedIndex = 211, + kX86InstIdVpminub_ExtendedIndex = 211, + kX86InstIdVpminud_ExtendedIndex = 211, + kX86InstIdVpminuw_ExtendedIndex = 211, + kX86InstIdVpmovmskb_ExtendedIndex = 254, + kX86InstIdVpmovsxbd_ExtendedIndex = 222, + kX86InstIdVpmovsxbq_ExtendedIndex = 222, + kX86InstIdVpmovsxbw_ExtendedIndex = 222, + kX86InstIdVpmovsxdq_ExtendedIndex = 222, + kX86InstIdVpmovsxwd_ExtendedIndex = 222, + kX86InstIdVpmovsxwq_ExtendedIndex = 222, + kX86InstIdVpmovzxbd_ExtendedIndex = 222, + kX86InstIdVpmovzxbq_ExtendedIndex = 222, + kX86InstIdVpmovzxbw_ExtendedIndex = 222, + kX86InstIdVpmovzxdq_ExtendedIndex = 222, + kX86InstIdVpmovzxwd_ExtendedIndex = 222, + kX86InstIdVpmovzxwq_ExtendedIndex = 222, + kX86InstIdVpmuldq_ExtendedIndex = 211, + kX86InstIdVpmulhrsw_ExtendedIndex = 211, + kX86InstIdVpmulhuw_ExtendedIndex = 211, + kX86InstIdVpmulhw_ExtendedIndex = 211, + kX86InstIdVpmulld_ExtendedIndex = 211, + kX86InstIdVpmullw_ExtendedIndex = 211, + kX86InstIdVpmuludq_ExtendedIndex = 211, + kX86InstIdVpor_ExtendedIndex = 211, + kX86InstIdVpperm_ExtendedIndex = 279, + kX86InstIdVprotb_ExtendedIndex = 280, + kX86InstIdVprotd_ExtendedIndex = 281, + kX86InstIdVprotq_ExtendedIndex = 282, + kX86InstIdVprotw_ExtendedIndex = 283, + kX86InstIdVpsadbw_ExtendedIndex = 211, + kX86InstIdVpshab_ExtendedIndex = 284, + kX86InstIdVpshad_ExtendedIndex = 284, + kX86InstIdVpshaq_ExtendedIndex = 284, + kX86InstIdVpshaw_ExtendedIndex = 284, + kX86InstIdVpshlb_ExtendedIndex = 284, + kX86InstIdVpshld_ExtendedIndex = 284, + kX86InstIdVpshlq_ExtendedIndex = 284, + kX86InstIdVpshlw_ExtendedIndex = 284, + kX86InstIdVpshufb_ExtendedIndex = 211, + kX86InstIdVpshufd_ExtendedIndex = 285, + kX86InstIdVpshufhw_ExtendedIndex = 285, + kX86InstIdVpshuflw_ExtendedIndex = 285, + kX86InstIdVpsignb_ExtendedIndex = 211, + kX86InstIdVpsignd_ExtendedIndex = 211, + kX86InstIdVpsignw_ExtendedIndex = 211, + kX86InstIdVpslld_ExtendedIndex = 286, + kX86InstIdVpslldq_ExtendedIndex = 287, + kX86InstIdVpsllq_ExtendedIndex = 288, + kX86InstIdVpsllvd_ExtendedIndex = 211, + kX86InstIdVpsllvq_ExtendedIndex = 211, + kX86InstIdVpsllw_ExtendedIndex = 289, + kX86InstIdVpsrad_ExtendedIndex = 290, + kX86InstIdVpsravd_ExtendedIndex = 211, + kX86InstIdVpsraw_ExtendedIndex = 291, + kX86InstIdVpsrld_ExtendedIndex = 292, + kX86InstIdVpsrldq_ExtendedIndex = 287, + kX86InstIdVpsrlq_ExtendedIndex = 293, + kX86InstIdVpsrlvd_ExtendedIndex = 211, + kX86InstIdVpsrlvq_ExtendedIndex = 211, + kX86InstIdVpsrlw_ExtendedIndex = 294, + kX86InstIdVpsubb_ExtendedIndex = 211, + kX86InstIdVpsubd_ExtendedIndex = 211, + kX86InstIdVpsubq_ExtendedIndex = 211, + kX86InstIdVpsubsb_ExtendedIndex = 211, + kX86InstIdVpsubsw_ExtendedIndex = 211, + kX86InstIdVpsubusb_ExtendedIndex = 211, + kX86InstIdVpsubusw_ExtendedIndex = 211, + kX86InstIdVpsubw_ExtendedIndex = 211, + kX86InstIdVptest_ExtendedIndex = 295, + kX86InstIdVpunpckhbw_ExtendedIndex = 211, + kX86InstIdVpunpckhdq_ExtendedIndex = 211, + kX86InstIdVpunpckhqdq_ExtendedIndex = 211, + kX86InstIdVpunpckhwd_ExtendedIndex = 211, + kX86InstIdVpunpcklbw_ExtendedIndex = 211, + kX86InstIdVpunpckldq_ExtendedIndex = 211, + kX86InstIdVpunpcklqdq_ExtendedIndex = 211, + kX86InstIdVpunpcklwd_ExtendedIndex = 211, + kX86InstIdVpxor_ExtendedIndex = 211, + kX86InstIdVrcpps_ExtendedIndex = 222, + kX86InstIdVrcpss_ExtendedIndex = 212, + kX86InstIdVroundpd_ExtendedIndex = 285, + kX86InstIdVroundps_ExtendedIndex = 285, + kX86InstIdVroundsd_ExtendedIndex = 220, + kX86InstIdVroundss_ExtendedIndex = 220, + kX86InstIdVrsqrtps_ExtendedIndex = 222, + kX86InstIdVrsqrtss_ExtendedIndex = 212, + kX86InstIdVshufpd_ExtendedIndex = 215, + kX86InstIdVshufps_ExtendedIndex = 215, + kX86InstIdVsqrtpd_ExtendedIndex = 222, + kX86InstIdVsqrtps_ExtendedIndex = 222, + kX86InstIdVsqrtsd_ExtendedIndex = 212, + kX86InstIdVsqrtss_ExtendedIndex = 212, + kX86InstIdVstmxcsr_ExtendedIndex = 296, + kX86InstIdVsubpd_ExtendedIndex = 211, + kX86InstIdVsubps_ExtendedIndex = 211, + kX86InstIdVsubsd_ExtendedIndex = 212, + kX86InstIdVsubss_ExtendedIndex = 212, + kX86InstIdVtestpd_ExtendedIndex = 295, + kX86InstIdVtestps_ExtendedIndex = 295, + kX86InstIdVucomisd_ExtendedIndex = 221, + kX86InstIdVucomiss_ExtendedIndex = 221, + kX86InstIdVunpckhpd_ExtendedIndex = 211, + kX86InstIdVunpckhps_ExtendedIndex = 211, + kX86InstIdVunpcklpd_ExtendedIndex = 211, + kX86InstIdVunpcklps_ExtendedIndex = 211, + kX86InstIdVxorpd_ExtendedIndex = 211, + kX86InstIdVxorps_ExtendedIndex = 211, + kX86InstIdVzeroall_ExtendedIndex = 297, + kX86InstIdVzeroupper_ExtendedIndex = 297, + kX86InstIdWrfsbase_ExtendedIndex = 298, + kX86InstIdWrgsbase_ExtendedIndex = 298, + kX86InstIdXadd_ExtendedIndex = 299, + kX86InstIdXchg_ExtendedIndex = 300, + kX86InstIdXgetbv_ExtendedIndex = 135, + kX86InstIdXor_ExtendedIndex = 3, + kX86InstIdXorpd_ExtendedIndex = 301, + kX86InstIdXorps_ExtendedIndex = 301, + kX86InstIdXrstor_ExtendedIndex = 302, + kX86InstIdXrstor64_ExtendedIndex = 302, + kX86InstIdXsave_ExtendedIndex = 302, + kX86InstIdXsave64_ExtendedIndex = 302, + kX86InstIdXsaveopt_ExtendedIndex = 302, + kX86InstIdXsaveopt64_ExtendedIndex = 302, + kX86InstIdXsetbv_ExtendedIndex = 107 +}; +// ---------------------------------------------------------------------------- +// ${X86InstData:End} + +// Please run tools/src-gendefs.js (by using just node.js, without any dependencies) to regenerate the code enclosed with ${X86InstData...}. +const X86InstInfo _x86InstInfo[] = { + // <-----------------+-------------------+----------------------+-------------------------------------------+------------------------------------+-------------+-------+---------------------------------------------------------------------------------------------------+ + // | | | Instruction Opcodes | Instruction Flags | E-FLAGS | Write | Operands (Gp/Fp/Mm/K/Xmm/Ymm/Zmm Regs, Mem, Imm, Label, None/Undefined) | + // Instruction Id | Instruction Name | Instruction Encoding +---------------------+---------------------+---------------+--------------------+-------------+---+---+-------------------+-------------------+-------------------+-------------------+-------------------+ + // | | | 0:PP-MMM OP/O L/W/EW| 1:PP-MMM OP/O L/W/EW| Global Flags |A512(ID|VL|kz|rnd|b)| EF:OSZAPCDX |Idx| Sz| [0] 1st Operand | [1] 2nd Operand | [2] 3rd Operand | [3] 4th Operand | [4] 5th Operand | + // <-----------------+-------------------+----------------------+---------------------+---------------------+---------------+--------------------+-------------+---+---+-------------------+-------------------+-------------------+-------------------+-------------------+ + INST(None , "" , Enc(None) , U , U , F(None) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Adc , "adc" , Enc(X86Arith) , O_000000(10,2,_,_,_), U , F(RW)|F(Lock) , EF(WWWWWX__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(Adcx , "adcx" , Enc(X86RegRm) , O_660F38(F6,U,_,_,_), U , F(RW) , EF(_____X__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Add , "add" , Enc(X86Arith) , O_000000(00,0,_,_,_), U , F(RW)|F(Lock) , EF(WWWWWW__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(Addpd , "addpd" , Enc(SimdRm) , O_660F00(58,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Addps , "addps" , Enc(SimdRm) , O_000F00(58,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Addsd , "addsd" , Enc(SimdRm) , O_F20F00(58,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Addss , "addss" , Enc(SimdRm) , O_F30F00(58,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Addsubpd , "addsubpd" , Enc(SimdRm) , O_660F00(D0,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Addsubps , "addsubps" , Enc(SimdRm) , O_F20F00(D0,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Adox , "adox" , Enc(X86RegRm) , O_F30F38(F6,U,_,_,_), U , F(RW) , EF(X_______), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Aesdec , "aesdec" , Enc(SimdRm) , O_660F38(DE,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Aesdeclast , "aesdeclast" , Enc(SimdRm) , O_660F38(DF,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Aesenc , "aesenc" , Enc(SimdRm) , O_660F38(DC,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Aesenclast , "aesenclast" , Enc(SimdRm) , O_660F38(DD,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Aesimc , "aesimc" , Enc(SimdRm) , O_660F38(DB,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Aeskeygenassist , "aeskeygenassist" , Enc(SimdRmi) , O_660F3A(DF,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(And , "and" , Enc(X86Arith) , O_000000(20,4,_,_,_), U , F(RW)|F(Lock) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(Andn , "andn" , Enc(AvxRvm_OptW) , O_000F38(F2,U,_,_,_), U , F(RW) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(Gqd) , O(GqdMem) , U , U ), + INST(Andnpd , "andnpd" , Enc(SimdRm) , O_660F00(55,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Andnps , "andnps" , Enc(SimdRm) , O_000F00(55,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Andpd , "andpd" , Enc(SimdRm) , O_660F00(54,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Andps , "andps" , Enc(SimdRm) , O_000F00(54,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Bextr , "bextr" , Enc(AvxRmv_OptW) , O_000F38(F7,U,_,_,_), U , F(RW) , EF(WUWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , O(Gqd) , U , U ), + INST(Blcfill , "blcfill" , Enc(XopVm_OptW) , O_00_M09(01,1,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blci , "blci" , Enc(XopVm_OptW) , O_00_M09(02,6,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blcic , "blcic" , Enc(XopVm_OptW) , O_00_M09(01,5,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blcmsk , "blcmsk" , Enc(XopVm_OptW) , O_00_M09(02,1,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blcs , "blcs" , Enc(XopVm_OptW) , O_00_M09(01,3,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blendpd , "blendpd" , Enc(SimdRmi) , O_660F3A(0D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Blendps , "blendps" , Enc(SimdRmi) , O_660F3A(0C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Blendvpd , "blendvpd" , Enc(SimdRm) , O_660F38(15,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Blendvps , "blendvps" , Enc(SimdRm) , O_660F38(14,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Blsfill , "blsfill" , Enc(XopVm_OptW) , O_00_M09(01,2,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blsi , "blsi" , Enc(AvxVm_OptW) , O_000F38(F3,3,_,_,_), U , F(RW) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blsic , "blsic" , Enc(XopVm_OptW) , O_00_M09(01,6,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blsmsk , "blsmsk" , Enc(AvxVm_OptW) , O_000F38(F3,2,_,_,_), U , F(RW) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Blsr , "blsr" , Enc(AvxVm_OptW) , O_000F38(F3,1,_,_,_), U , F(RW) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Bsf , "bsf" , Enc(X86RegRm) , O_000F00(BC,U,_,_,_), U , F(RW) , EF(UUWUUU__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Bsr , "bsr" , Enc(X86RegRm) , O_000F00(BD,U,_,_,_), U , F(RW) , EF(UUWUUU__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Bswap , "bswap" , Enc(X86BSwap) , O_000F00(C8,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Gqd) , U , U , U , U ), + INST(Bt , "bt" , Enc(X86BTest) , O_000F00(A3,U,_,_,_), O_000F00(BA,4,_,_,_), F(RO) , EF(UU_UUW__), 0 , 0 , O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U ), + INST(Btc , "btc" , Enc(X86BTest) , O_000F00(BB,U,_,_,_), O_000F00(BA,7,_,_,_), F(RW)|F(Lock) , EF(UU_UUW__), 0 , 0 , O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U ), + INST(Btr , "btr" , Enc(X86BTest) , O_000F00(B3,U,_,_,_), O_000F00(BA,6,_,_,_), F(RW)|F(Lock) , EF(UU_UUW__), 0 , 0 , O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U ), + INST(Bts , "bts" , Enc(X86BTest) , O_000F00(AB,U,_,_,_), O_000F00(BA,5,_,_,_), F(RW)|F(Lock) , EF(UU_UUW__), 0 , 0 , O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , U ), + INST(Bzhi , "bzhi" , Enc(AvxRmv_OptW) , O_000F38(F5,U,_,_,_), U , F(RW) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , O(Gqd) , U , U ), + INST(Call , "call" , Enc(X86Call) , O_000000(FF,2,_,_,_), O_000000(E8,U,_,_,_), F(RW)|F(Flow)|F(Volatile) , EF(________), 0 , 0 , O(GqdMem)|O(LImm) , U , U , U , U ), + INST(Cbw , "cbw" , Enc(X86Op) , O_660000(98,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Cdq , "cdq" , Enc(X86Op) , O_000000(99,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Cdqe , "cdqe" , Enc(X86Op) , O_000000(98,U,_,W,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Clc , "clc" , Enc(X86Op) , O_000000(F8,U,_,_,_), U , F(Volatile) , EF(_____W__), 0 , 0 , U , U , U , U , U ), + INST(Cld , "cld" , Enc(X86Op) , O_000000(FC,U,_,_,_), U , F(Volatile) , EF(______W_), 0 , 0 , U , U , U , U , U ), + INST(Clflush , "clflush" , Enc(X86M) , O_000F00(AE,7,_,_,_), U , F(RO)|F(Volatile) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Clflushopt , "clflushopt" , Enc(X86M) , O_660F00(AE,7,_,_,_), U , F(RO)|F(Volatile) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Cmc , "cmc" , Enc(X86Op) , O_000000(F5,U,_,_,_), U , F(None) , EF(_____X__), 0 , 0 , U , U , U , U , U ), + INST(Cmova , "cmova" , Enc(X86RegRm) , O_000F00(47,U,_,_,_), U , F(RW) , EF(__R__R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovae , "cmovae" , Enc(X86RegRm) , O_000F00(43,U,_,_,_), U , F(RW) , EF(_____R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovb , "cmovb" , Enc(X86RegRm) , O_000F00(42,U,_,_,_), U , F(RW) , EF(_____R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovbe , "cmovbe" , Enc(X86RegRm) , O_000F00(46,U,_,_,_), U , F(RW) , EF(__R__R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovc , "cmovc" , Enc(X86RegRm) , O_000F00(42,U,_,_,_), U , F(RW) , EF(_____R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmove , "cmove" , Enc(X86RegRm) , O_000F00(44,U,_,_,_), U , F(RW) , EF(__R_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovg , "cmovg" , Enc(X86RegRm) , O_000F00(4F,U,_,_,_), U , F(RW) , EF(RRR_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovge , "cmovge" , Enc(X86RegRm) , O_000F00(4D,U,_,_,_), U , F(RW) , EF(RR______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovl , "cmovl" , Enc(X86RegRm) , O_000F00(4C,U,_,_,_), U , F(RW) , EF(RR______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovle , "cmovle" , Enc(X86RegRm) , O_000F00(4E,U,_,_,_), U , F(RW) , EF(RRR_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovna , "cmovna" , Enc(X86RegRm) , O_000F00(46,U,_,_,_), U , F(RW) , EF(__R__R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnae , "cmovnae" , Enc(X86RegRm) , O_000F00(42,U,_,_,_), U , F(RW) , EF(_____R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnb , "cmovnb" , Enc(X86RegRm) , O_000F00(43,U,_,_,_), U , F(RW) , EF(_____R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnbe , "cmovnbe" , Enc(X86RegRm) , O_000F00(47,U,_,_,_), U , F(RW) , EF(__R__R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnc , "cmovnc" , Enc(X86RegRm) , O_000F00(43,U,_,_,_), U , F(RW) , EF(_____R__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovne , "cmovne" , Enc(X86RegRm) , O_000F00(45,U,_,_,_), U , F(RW) , EF(__R_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovng , "cmovng" , Enc(X86RegRm) , O_000F00(4E,U,_,_,_), U , F(RW) , EF(RRR_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnge , "cmovnge" , Enc(X86RegRm) , O_000F00(4C,U,_,_,_), U , F(RW) , EF(RR______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnl , "cmovnl" , Enc(X86RegRm) , O_000F00(4D,U,_,_,_), U , F(RW) , EF(RR______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnle , "cmovnle" , Enc(X86RegRm) , O_000F00(4F,U,_,_,_), U , F(RW) , EF(RRR_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovno , "cmovno" , Enc(X86RegRm) , O_000F00(41,U,_,_,_), U , F(RW) , EF(R_______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnp , "cmovnp" , Enc(X86RegRm) , O_000F00(4B,U,_,_,_), U , F(RW) , EF(____R___), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovns , "cmovns" , Enc(X86RegRm) , O_000F00(49,U,_,_,_), U , F(RW) , EF(_R______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovnz , "cmovnz" , Enc(X86RegRm) , O_000F00(45,U,_,_,_), U , F(RW) , EF(__R_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovo , "cmovo" , Enc(X86RegRm) , O_000F00(40,U,_,_,_), U , F(RW) , EF(R_______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovp , "cmovp" , Enc(X86RegRm) , O_000F00(4A,U,_,_,_), U , F(RW) , EF(____R___), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovpe , "cmovpe" , Enc(X86RegRm) , O_000F00(4A,U,_,_,_), U , F(RW) , EF(____R___), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovpo , "cmovpo" , Enc(X86RegRm) , O_000F00(4B,U,_,_,_), U , F(RW) , EF(____R___), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovs , "cmovs" , Enc(X86RegRm) , O_000F00(48,U,_,_,_), U , F(RW) , EF(_R______), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmovz , "cmovz" , Enc(X86RegRm) , O_000F00(44,U,_,_,_), U , F(RW) , EF(__R_____), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Cmp , "cmp" , Enc(X86Arith) , O_000000(38,7,_,_,_), U , F(RO) , EF(WWWWWW__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(Cmppd , "cmppd" , Enc(SimdRmi) , O_660F00(C2,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Cmpps , "cmpps" , Enc(SimdRmi) , O_000F00(C2,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(CmpsB , "cmps_b" , Enc(X86Op) , O_000000(A6,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(CmpsD , "cmps_d" , Enc(X86Op) , O_000000(A7,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(CmpsQ , "cmps_q" , Enc(X86Op) , O_000000(A7,U,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(CmpsW , "cmps_w" , Enc(X86Op_66H) , O_000000(A7,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(Cmpsd , "cmpsd" , Enc(SimdRmi) , O_F20F00(C2,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Cmpss , "cmpss" , Enc(SimdRmi) , O_F30F00(C2,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Cmpxchg , "cmpxchg" , Enc(X86RmReg) , O_000F00(B0,U,_,_,_), U , F(RW)|F(Lock)|F(Special) , EF(WWWWWW__), 0 , 0 , U , U , U , U , U ), + INST(Cmpxchg16b , "cmpxchg16b" , Enc(X86M) , O_000F00(C7,1,_,W,_), U , F(RW)|F(Lock)|F(Special) , EF(__W_____), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Cmpxchg8b , "cmpxchg8b" , Enc(X86M) , O_000F00(C7,1,_,_,_), U , F(RW)|F(Lock)|F(Special) , EF(__W_____), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Comisd , "comisd" , Enc(SimdRm) , O_660F00(2F,U,_,_,_), U , F(RO) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Comiss , "comiss" , Enc(SimdRm) , O_000F00(2F,U,_,_,_), U , F(RO) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cpuid , "cpuid" , Enc(X86Op) , O_000F00(A2,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Cqo , "cqo" , Enc(X86Op) , O_000000(99,U,_,W,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Crc32 , "crc32" , Enc(X86Crc) , O_F20F38(F0,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Gqd) , O(GqdwbMem) , U , U , U ), + INST(Cvtdq2pd , "cvtdq2pd" , Enc(SimdRm) , O_F30F00(E6,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtdq2ps , "cvtdq2ps" , Enc(SimdRm) , O_000F00(5B,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtpd2dq , "cvtpd2dq" , Enc(SimdRm) , O_F20F00(E6,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtpd2pi , "cvtpd2pi" , Enc(SimdRm) , O_660F00(2D,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(XmmMem) , U , U , U ), + INST(Cvtpd2ps , "cvtpd2ps" , Enc(SimdRm) , O_660F00(5A,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtpi2pd , "cvtpi2pd" , Enc(SimdRm) , O_660F00(2A,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(MmMem) , U , U , U ), + INST(Cvtpi2ps , "cvtpi2ps" , Enc(SimdRm) , O_000F00(2A,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Xmm) , O(MmMem) , U , U , U ), + INST(Cvtps2dq , "cvtps2dq" , Enc(SimdRm) , O_660F00(5B,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtps2pd , "cvtps2pd" , Enc(SimdRm) , O_000F00(5A,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtps2pi , "cvtps2pi" , Enc(SimdRm) , O_000F00(2D,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(XmmMem) , U , U , U ), + INST(Cvtsd2si , "cvtsd2si" , Enc(SimdRm_Q) , O_F20F00(2D,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Cvtsd2ss , "cvtsd2ss" , Enc(SimdRm) , O_F20F00(5A,U,_,_,_), U , F(WO) , EF(________), 0 , 4 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtsi2sd , "cvtsi2sd" , Enc(SimdRm_Q) , O_F20F00(2A,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Xmm) , O(GqdMem) , U , U , U ), + INST(Cvtsi2ss , "cvtsi2ss" , Enc(SimdRm_Q) , O_F30F00(2A,U,_,_,_), U , F(WO) , EF(________), 0 , 4 , O(Xmm) , O(GqdMem) , U , U , U ), + INST(Cvtss2sd , "cvtss2sd" , Enc(SimdRm) , O_F30F00(5A,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvtss2si , "cvtss2si" , Enc(SimdRm_Q) , O_F30F00(2D,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Cvttpd2dq , "cvttpd2dq" , Enc(SimdRm) , O_660F00(E6,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvttpd2pi , "cvttpd2pi" , Enc(SimdRm) , O_660F00(2C,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(XmmMem) , U , U , U ), + INST(Cvttps2dq , "cvttps2dq" , Enc(SimdRm) , O_F30F00(5B,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Cvttps2pi , "cvttps2pi" , Enc(SimdRm) , O_000F00(2C,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(XmmMem) , U , U , U ), + INST(Cvttsd2si , "cvttsd2si" , Enc(SimdRm_Q) , O_F20F00(2C,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Cvttss2si , "cvttss2si" , Enc(SimdRm_Q) , O_F30F00(2C,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Cwd , "cwd" , Enc(X86Op) , O_660000(99,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Cwde , "cwde" , Enc(X86Op) , O_000000(98,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Daa , "daa" , Enc(X86Op) , O_000000(27,U,_,_,_), U , F(RW)|F(Special) , EF(UWWXWX__), 0 , 0 , U , U , U , U , U ), + INST(Das , "das" , Enc(X86Op) , O_000000(2F,U,_,_,_), U , F(RW)|F(Special) , EF(UWWXWX__), 0 , 0 , U , U , U , U , U ), + INST(Dec , "dec" , Enc(X86IncDec) , O_000000(FE,1,_,_,_), O_000000(48,U,_,_,_), F(RW)|F(Lock) , EF(WWWWW___), 0 , 0 , O(GqdwbMem) , U , U , U , U ), + INST(Div , "div" , Enc(X86Rm_B) , O_000000(F6,6,_,_,_), U , F(RW)|F(Special) , EF(UUUUUU__), 0 , 0 , U , U , U , U , U ), + INST(Divpd , "divpd" , Enc(SimdRm) , O_660F00(5E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Divps , "divps" , Enc(SimdRm) , O_000F00(5E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Divsd , "divsd" , Enc(SimdRm) , O_F20F00(5E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Divss , "divss" , Enc(SimdRm) , O_F30F00(5E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Dppd , "dppd" , Enc(SimdRmi) , O_660F3A(41,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Dpps , "dpps" , Enc(SimdRmi) , O_660F3A(40,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Emms , "emms" , Enc(X86Op) , O_000F00(77,U,_,_,_), U , F(Volatile) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Enter , "enter" , Enc(X86Enter) , O_000000(C8,U,_,_,_), U , F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Extractps , "extractps" , Enc(SimdExtract) , O_660F3A(17,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(GqdMem) , O(Xmm) , U , U , U ), + INST(Extrq , "extrq" , Enc(SimdExtrq) , O_660F00(79,U,_,_,_), O_660F00(78,0,_,_,_), F(RW) , EF(________), 0 , 0 , O(Xmm) , O(Xmm)|O(Imm) , O(None)|O(Imm) , U , U ), + INST(F2xm1 , "f2xm1" , Enc(FpuOp) , O_00_X(D9F0,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fabs , "fabs" , Enc(FpuOp) , O_00_X(D9E1,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fadd , "fadd" , Enc(FpuArith) , O_00_X(C0C0,0) , U , F(Fp)|F(Mem4_8) , EF(________), 0 , 0 , O(FpMem) , O(Fp) , U , U , U ), + INST(Faddp , "faddp" , Enc(FpuRDef) , O_00_X(DEC0,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fbld , "fbld" , Enc(X86M) , O_000000(DF,4,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fbstp , "fbstp" , Enc(X86M) , O_000000(DF,6,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fchs , "fchs" , Enc(FpuOp) , O_00_X(D9E0,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fclex , "fclex" , Enc(FpuOp) , O_9B_X(DBE2,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fcmovb , "fcmovb" , Enc(FpuR) , O_00_X(DAC0,U) , U , F(Fp) , EF(_____R__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcmovbe , "fcmovbe" , Enc(FpuR) , O_00_X(DAD0,U) , U , F(Fp) , EF(__R__R__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcmove , "fcmove" , Enc(FpuR) , O_00_X(DAC8,U) , U , F(Fp) , EF(__R_____), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcmovnb , "fcmovnb" , Enc(FpuR) , O_00_X(DBC0,U) , U , F(Fp) , EF(_____R__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcmovnbe , "fcmovnbe" , Enc(FpuR) , O_00_X(DBD0,U) , U , F(Fp) , EF(__R__R__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcmovne , "fcmovne" , Enc(FpuR) , O_00_X(DBC8,U) , U , F(Fp) , EF(__R_____), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcmovnu , "fcmovnu" , Enc(FpuR) , O_00_X(DBD8,U) , U , F(Fp) , EF(____R___), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcmovu , "fcmovu" , Enc(FpuR) , O_00_X(DAD8,U) , U , F(Fp) , EF(____R___), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcom , "fcom" , Enc(FpuCom) , O_00_X(D0D0,2) , U , F(Fp) , EF(________), 0 , 0 , O(Fp)|O(Mem) , O(Fp) , U , U , U ), + INST(Fcomi , "fcomi" , Enc(FpuR) , O_00_X(DBF0,U) , U , F(Fp) , EF(WWWWWW__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcomip , "fcomip" , Enc(FpuR) , O_00_X(DFF0,U) , U , F(Fp) , EF(WWWWWW__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fcomp , "fcomp" , Enc(FpuCom) , O_00_X(D8D8,3) , U , F(Fp) , EF(________), 0 , 0 , O(Fp)|O(Mem) , O(Fp) , U , U , U ), + INST(Fcompp , "fcompp" , Enc(FpuOp) , O_00_X(DED9,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fcos , "fcos" , Enc(FpuOp) , O_00_X(D9FF,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fdecstp , "fdecstp" , Enc(FpuOp) , O_00_X(D9F6,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fdiv , "fdiv" , Enc(FpuArith) , O_00_X(F0F8,6) , U , F(Fp)|F(Mem4_8) , EF(________), 0 , 0 , O(FpMem) , O(Fp) , U , U , U ), + INST(Fdivp , "fdivp" , Enc(FpuRDef) , O_00_X(DEF8,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fdivr , "fdivr" , Enc(FpuArith) , O_00_X(F8F0,7) , U , F(Fp)|F(Mem4_8) , EF(________), 0 , 0 , O(FpMem) , O(Fp) , U , U , U ), + INST(Fdivrp , "fdivrp" , Enc(FpuRDef) , O_00_X(DEF0,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Femms , "femms" , Enc(X86Op) , O_000F00(0E,U,_,_,_), U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Ffree , "ffree" , Enc(FpuR) , O_00_X(DDC0,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fiadd , "fiadd" , Enc(FpuM) , O_000000(DA,0,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Ficom , "ficom" , Enc(FpuM) , O_000000(DA,2,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Ficomp , "ficomp" , Enc(FpuM) , O_000000(DA,3,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fidiv , "fidiv" , Enc(FpuM) , O_000000(DA,6,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fidivr , "fidivr" , Enc(FpuM) , O_000000(DA,7,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fild , "fild" , Enc(FpuM) , O_000000(DB,0,_,_,_), O_000000(DF,5,_,_,_), F(Fp)|F(Mem2_4_8) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fimul , "fimul" , Enc(FpuM) , O_000000(DA,1,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fincstp , "fincstp" , Enc(FpuOp) , O_00_X(D9F7,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Finit , "finit" , Enc(FpuOp) , O_9B_X(DBE3,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fist , "fist" , Enc(FpuM) , O_000000(DB,2,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fistp , "fistp" , Enc(FpuM) , O_000000(DB,3,_,_,_), O_000000(DF,7,_,_,_), F(Fp)|F(Mem2_4_8) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fisttp , "fisttp" , Enc(FpuM) , O_000000(DB,1,_,_,_), O_000000(DD,1,_,_,_), F(Fp)|F(Mem2_4_8) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fisub , "fisub" , Enc(FpuM) , O_000000(DA,4,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fisubr , "fisubr" , Enc(FpuM) , O_000000(DA,5,_,_,_), U , F(Fp)|F(Mem2_4) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fld , "fld" , Enc(FpuFldFst) , O_000000(D9,0,_,_,_), O_000000(DB,5,_,_,_), F(Fp)|F(Mem4_8_10) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fld1 , "fld1" , Enc(FpuOp) , O_00_X(D9E8,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fldcw , "fldcw" , Enc(X86M) , O_000000(D9,5,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fldenv , "fldenv" , Enc(X86M) , O_000000(D9,4,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fldl2e , "fldl2e" , Enc(FpuOp) , O_00_X(D9EA,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fldl2t , "fldl2t" , Enc(FpuOp) , O_00_X(D9E9,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fldlg2 , "fldlg2" , Enc(FpuOp) , O_00_X(D9EC,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fldln2 , "fldln2" , Enc(FpuOp) , O_00_X(D9ED,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fldpi , "fldpi" , Enc(FpuOp) , O_00_X(D9EB,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fldz , "fldz" , Enc(FpuOp) , O_00_X(D9EE,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fmul , "fmul" , Enc(FpuArith) , O_00_X(C8C8,1) , U , F(Fp)|F(Mem4_8) , EF(________), 0 , 0 , O(FpMem) , O(Fp) , U , U , U ), + INST(Fmulp , "fmulp" , Enc(FpuRDef) , O_00_X(DEC8,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fnclex , "fnclex" , Enc(FpuOp) , O_00_X(DBE2,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fninit , "fninit" , Enc(FpuOp) , O_00_X(DBE3,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fnop , "fnop" , Enc(FpuOp) , O_00_X(D9D0,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fnsave , "fnsave" , Enc(X86M) , O_000000(DD,6,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fnstcw , "fnstcw" , Enc(X86M) , O_000000(D9,7,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fnstenv , "fnstenv" , Enc(X86M) , O_000000(D9,6,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fnstsw , "fnstsw" , Enc(FpuStsw) , O_000000(DD,7,_,_,_), O_00_X(DFE0,U) , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fpatan , "fpatan" , Enc(FpuOp) , O_00_X(D9F3,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fprem , "fprem" , Enc(FpuOp) , O_00_X(D9F8,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fprem1 , "fprem1" , Enc(FpuOp) , O_00_X(D9F5,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fptan , "fptan" , Enc(FpuOp) , O_00_X(D9F2,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Frndint , "frndint" , Enc(FpuOp) , O_00_X(D9FC,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Frstor , "frstor" , Enc(X86M) , O_000000(DD,4,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fsave , "fsave" , Enc(X86M) , O_9B0000(DD,6,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fscale , "fscale" , Enc(FpuOp) , O_00_X(D9FD,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fsin , "fsin" , Enc(FpuOp) , O_00_X(D9FE,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fsincos , "fsincos" , Enc(FpuOp) , O_00_X(D9FB,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fsqrt , "fsqrt" , Enc(FpuOp) , O_00_X(D9FA,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fst , "fst" , Enc(FpuFldFst) , O_000000(D9,2,_,_,_), U , F(Fp)|F(Mem4_8) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fstcw , "fstcw" , Enc(X86M) , O_9B0000(D9,7,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fstenv , "fstenv" , Enc(X86M) , O_9B0000(D9,6,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fstp , "fstp" , Enc(FpuFldFst) , O_000000(D9,3,_,_,_), O_000000(DB,7,_,_,_), F(Fp)|F(Mem4_8_10) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fstsw , "fstsw" , Enc(FpuStsw) , O_9B0000(DD,7,_,_,_), O_9B_X(DFE0,U) , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fsub , "fsub" , Enc(FpuArith) , O_00_X(E0E8,4) , U , F(Fp)|F(Mem4_8) , EF(________), 0 , 0 , O(FpMem) , O(Fp) , U , U , U ), + INST(Fsubp , "fsubp" , Enc(FpuRDef) , O_00_X(DEE8,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fsubr , "fsubr" , Enc(FpuArith) , O_00_X(E8E0,5) , U , F(Fp)|F(Mem4_8) , EF(________), 0 , 0 , O(FpMem) , O(Fp) , U , U , U ), + INST(Fsubrp , "fsubrp" , Enc(FpuRDef) , O_00_X(DEE0,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Ftst , "ftst" , Enc(FpuOp) , O_00_X(D9E4,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fucom , "fucom" , Enc(FpuRDef) , O_00_X(DDE0,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fucomi , "fucomi" , Enc(FpuR) , O_00_X(DBE8,U) , U , F(Fp) , EF(WWWWWW__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fucomip , "fucomip" , Enc(FpuR) , O_00_X(DFE8,U) , U , F(Fp) , EF(WWWWWW__), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fucomp , "fucomp" , Enc(FpuRDef) , O_00_X(DDE8,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fucompp , "fucompp" , Enc(FpuOp) , O_00_X(DAE9,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fwait , "fwait" , Enc(X86Op) , O_000000(DB,U,_,_,_), U , F(Fp)|F(Volatile) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fxam , "fxam" , Enc(FpuOp) , O_00_X(D9E5,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fxch , "fxch" , Enc(FpuR) , O_00_X(D9C8,U) , U , F(Fp) , EF(________), 0 , 0 , O(Fp) , U , U , U , U ), + INST(Fxrstor , "fxrstor" , Enc(X86M) , O_000F00(AE,1,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fxrstor64 , "fxrstor64" , Enc(X86M) , O_000F00(AE,1,_,W,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fxsave , "fxsave" , Enc(X86M) , O_000F00(AE,0,_,_,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fxsave64 , "fxsave64" , Enc(X86M) , O_000F00(AE,0,_,W,_), U , F(Fp) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Fxtract , "fxtract" , Enc(FpuOp) , O_00_X(D9F4,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fyl2x , "fyl2x" , Enc(FpuOp) , O_00_X(D9F1,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Fyl2xp1 , "fyl2xp1" , Enc(FpuOp) , O_00_X(D9F9,U) , U , F(Fp) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Haddpd , "haddpd" , Enc(SimdRm) , O_660F00(7C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Haddps , "haddps" , Enc(SimdRm) , O_F20F00(7C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Hsubpd , "hsubpd" , Enc(SimdRm) , O_660F00(7D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Hsubps , "hsubps" , Enc(SimdRm) , O_F20F00(7D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Idiv , "idiv" , Enc(X86Rm_B) , O_000000(F6,7,_,_,_), U , F(RW)|F(Special) , EF(UUUUUU__), 0 , 0 , 0 , 0 , U , U , U ), + INST(Imul , "imul" , Enc(X86Imul) , U , U , F(RW)|F(Special) , EF(WUUUUW__), 0 , 0 , 0 , 0 , U , U , U ), + INST(Inc , "inc" , Enc(X86IncDec) , O_000000(FE,0,_,_,_), O_000000(40,U,_,_,_), F(RW)|F(Lock) , EF(WWWWW___), 0 , 0 , O(GqdwbMem) , U , U , U , U ), + INST(Insertps , "insertps" , Enc(SimdRmi) , O_660F3A(21,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Insertq , "insertq" , Enc(SimdInsertq) , O_F20F00(79,U,_,_,_), O_F20F00(78,U,_,_,_), F(RW) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(None)|O(Imm) , O(None)|O(Imm) , U ), + INST(Int , "int" , Enc(X86Int) , O_000000(CC,U,_,_,_), U , F(Volatile) , EF(_______W), 0 , 0 , U , U , U , U , U ), + INST(Ja , "ja" , Enc(X86Jcc) , O_000000(77,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R__R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jae , "jae" , Enc(X86Jcc) , O_000000(73,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_____R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jb , "jb" , Enc(X86Jcc) , O_000000(72,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_____R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jbe , "jbe" , Enc(X86Jcc) , O_000000(76,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R__R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jc , "jc" , Enc(X86Jcc) , O_000000(72,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_____R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Je , "je" , Enc(X86Jcc) , O_000000(74,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jg , "jg" , Enc(X86Jcc) , O_000000(7F,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RRR_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jge , "jge" , Enc(X86Jcc) , O_000000(7D,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RR______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jl , "jl" , Enc(X86Jcc) , O_000000(7C,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RR______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jle , "jle" , Enc(X86Jcc) , O_000000(7E,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RRR_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jna , "jna" , Enc(X86Jcc) , O_000000(76,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R__R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnae , "jnae" , Enc(X86Jcc) , O_000000(72,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_____R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnb , "jnb" , Enc(X86Jcc) , O_000000(73,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_____R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnbe , "jnbe" , Enc(X86Jcc) , O_000000(77,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R__R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnc , "jnc" , Enc(X86Jcc) , O_000000(73,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_____R__), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jne , "jne" , Enc(X86Jcc) , O_000000(75,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jng , "jng" , Enc(X86Jcc) , O_000000(7E,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RRR_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnge , "jnge" , Enc(X86Jcc) , O_000000(7C,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RR______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnl , "jnl" , Enc(X86Jcc) , O_000000(7D,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RR______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnle , "jnle" , Enc(X86Jcc) , O_000000(7F,U,_,_,_), U , F(Flow)|F(Volatile) , EF(RRR_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jno , "jno" , Enc(X86Jcc) , O_000000(71,U,_,_,_), U , F(Flow)|F(Volatile) , EF(R_______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnp , "jnp" , Enc(X86Jcc) , O_000000(7B,U,_,_,_), U , F(Flow)|F(Volatile) , EF(____R___), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jns , "jns" , Enc(X86Jcc) , O_000000(79,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_R______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jnz , "jnz" , Enc(X86Jcc) , O_000000(75,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jo , "jo" , Enc(X86Jcc) , O_000000(70,U,_,_,_), U , F(Flow)|F(Volatile) , EF(R_______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jp , "jp" , Enc(X86Jcc) , O_000000(7A,U,_,_,_), U , F(Flow)|F(Volatile) , EF(____R___), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jpe , "jpe" , Enc(X86Jcc) , O_000000(7A,U,_,_,_), U , F(Flow)|F(Volatile) , EF(____R___), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jpo , "jpo" , Enc(X86Jcc) , O_000000(7B,U,_,_,_), U , F(Flow)|F(Volatile) , EF(____R___), 0 , 0 , O(Label) , U , U , U , U ), + INST(Js , "js" , Enc(X86Jcc) , O_000000(78,U,_,_,_), U , F(Flow)|F(Volatile) , EF(_R______), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jz , "jz" , Enc(X86Jcc) , O_000000(74,U,_,_,_), U , F(Flow)|F(Volatile) , EF(__R_____), 0 , 0 , O(Label) , U , U , U , U ), + INST(Jecxz , "jecxz" , Enc(X86Jecxz) , O_000000(E3,U,_,_,_), U , F(Flow)|F(Volatile)|F(Special) , EF(________), 0 , 0 , O(Gqdw) , O(Label) , U , U , U ), + INST(Jmp , "jmp" , Enc(X86Jmp) , O_000000(FF,4,_,_,_), O_000000(E9,U,_,_,_), F(Flow)|F(Volatile) , EF(________), 0 , 0 , O(Label)|O(Imm) , U , U , U , U ), + INST(Lahf , "lahf" , Enc(X86Op) , O_000000(9F,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(_RRRRR__), 0 , 0 , U , U , U , U , U ), + INST(Lddqu , "lddqu" , Enc(SimdRm) , O_F20F00(F0,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(Mem) , U , U , U ), + INST(Ldmxcsr , "ldmxcsr" , Enc(X86M) , O_000F00(AE,2,_,_,_), U , F(RO)|F(Volatile) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Lea , "lea" , Enc(X86Lea) , O_000000(8D,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqd) , O(Mem) , U , U , U ), + INST(Leave , "leave" , Enc(X86Op) , O_000000(C9,U,_,_,_), U , F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Lfence , "lfence" , Enc(X86Fence) , O_000F00(AE,5,_,_,_), U , F(Volatile) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(LodsB , "lods_b" , Enc(X86Op) , O_000000(AC,U,_,_,_), U , F(WO)|F(Special) , EF(______R_), 0 , 1 , U , U , U , U , U ), + INST(LodsD , "lods_d" , Enc(X86Op) , O_000000(AD,U,_,_,_), U , F(WO)|F(Special) , EF(______R_), 0 , 4 , U , U , U , U , U ), + INST(LodsQ , "lods_q" , Enc(X86Op) , O_000000(AD,U,_,W,_), U , F(WO)|F(Special) , EF(______R_), 0 , 8 , U , U , U , U , U ), + INST(LodsW , "lods_w" , Enc(X86Op_66H) , O_000000(AD,U,_,_,_), U , F(WO)|F(Special) , EF(______R_), 0 , 2 , U , U , U , U , U ), + INST(Lzcnt , "lzcnt" , Enc(X86RegRm) , O_F30F00(BD,U,_,_,_), U , F(RW) , EF(UUWUUW__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Maskmovdqu , "maskmovdqu" , Enc(SimdRm) , O_660F00(57,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , U , U , U ), + INST(Maskmovq , "maskmovq" , Enc(SimdRm) , O_000F00(F7,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , O(Mm) , O(Mm) , U , U , U ), + INST(Maxpd , "maxpd" , Enc(SimdRm) , O_660F00(5F,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Maxps , "maxps" , Enc(SimdRm) , O_000F00(5F,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Maxsd , "maxsd" , Enc(SimdRm) , O_F20F00(5F,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Maxss , "maxss" , Enc(SimdRm) , O_F30F00(5F,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Mfence , "mfence" , Enc(X86Fence) , O_000F00(AE,6,_,_,_), U , F(RW) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Minpd , "minpd" , Enc(SimdRm) , O_660F00(5D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Minps , "minps" , Enc(SimdRm) , O_000F00(5D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Minsd , "minsd" , Enc(SimdRm) , O_F20F00(5D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Minss , "minss" , Enc(SimdRm) , O_F30F00(5D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Monitor , "monitor" , Enc(X86Op) , O_000F01(C8,U,_,_,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Mov , "mov" , Enc(X86Mov) , U , U , F(WO) , EF(________), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(MovPtr , "mov_ptr" , Enc(X86MovPtr) , O_000000(A0,U,_,_,_), O_000000(A2,U,_,_,_), F(WO)|F(Special) , EF(________), 0 , 0 , O(Gqdwb) , O(Imm) , U , U , U ), + INST(Movapd , "movapd" , Enc(SimdMov) , O_660F00(28,U,_,_,_), O_660F00(29,U,_,_,_), F(WO) , EF(________), 0 , 16, O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movaps , "movaps" , Enc(SimdMov) , O_000F00(28,U,_,_,_), O_000F00(29,U,_,_,_), F(WO) , EF(________), 0 , 16, O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movbe , "movbe" , Enc(SimdMovBe) , O_000F38(F0,U,_,_,_), O_000F38(F1,U,_,_,_), F(WO) , EF(________), 0 , 0 , O(GqdwMem) , O(GqdwMem) , U , U , U ), + INST(Movd , "movd" , Enc(SimdMovD) , O_000F00(6E,U,_,_,_), O_000F00(7E,U,_,_,_), F(WO) , EF(________), 0 , 16, O(Gd)|O(MmXmmMem) , O(Gd)|O(MmXmmMem) , U , U , U ), + INST(Movddup , "movddup" , Enc(SimdMov) , O_F20F00(12,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Movdq2q , "movdq2q" , Enc(SimdMov) , O_F20F00(D6,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(Xmm) , U , U , U ), + INST(Movdqa , "movdqa" , Enc(SimdMov) , O_660F00(6F,U,_,_,_), O_660F00(7F,U,_,_,_), F(WO) , EF(________), 0 , 16, O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movdqu , "movdqu" , Enc(SimdMov) , O_F30F00(6F,U,_,_,_), O_F30F00(7F,U,_,_,_), F(WO) , EF(________), 0 , 16, O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movhlps , "movhlps" , Enc(SimdMov) , O_000F00(12,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Xmm) , O(Xmm) , U , U , U ), + INST(Movhpd , "movhpd" , Enc(SimdMov) , O_660F00(16,U,_,_,_), O_660F00(17,U,_,_,_), F(RW) , EF(________), 8 , 8 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movhps , "movhps" , Enc(SimdMov) , O_000F00(16,U,_,_,_), O_000F00(17,U,_,_,_), F(RW) , EF(________), 8 , 8 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movlhps , "movlhps" , Enc(SimdMov) , O_000F00(16,U,_,_,_), U , F(RW) , EF(________), 8 , 8 , O(Xmm) , O(Xmm) , U , U , U ), + INST(Movlpd , "movlpd" , Enc(SimdMov) , O_660F00(12,U,_,_,_), O_660F00(13,U,_,_,_), F(WO) , EF(________), 0 , 8 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movlps , "movlps" , Enc(SimdMov) , O_000F00(12,U,_,_,_), O_000F00(13,U,_,_,_), F(WO) , EF(________), 0 , 8 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movmskpd , "movmskpd" , Enc(SimdMovNoRexW) , O_660F00(50,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , O(Xmm) , U , U , U ), + INST(Movmskps , "movmskps" , Enc(SimdMovNoRexW) , O_000F00(50,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , O(Xmm) , U , U , U ), + INST(Movntdq , "movntdq" , Enc(SimdMov) , U , O_660F00(E7,U,_,_,_), F(WO) , EF(________), 0 , 16, O(Mem) , O(Xmm) , U , U , U ), + INST(Movntdqa , "movntdqa" , Enc(SimdMov) , O_660F38(2A,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(Mem) , U , U , U ), + INST(Movnti , "movnti" , Enc(SimdMov) , U , O_000F00(C3,U,_,_,_), F(WO) , EF(________), 0 , 8 , O(Mem) , O(Gqd) , U , U , U ), + INST(Movntpd , "movntpd" , Enc(SimdMov) , U , O_660F00(2B,U,_,_,_), F(WO) , EF(________), 0 , 16, O(Mem) , O(Xmm) , U , U , U ), + INST(Movntps , "movntps" , Enc(SimdMov) , U , O_000F00(2B,U,_,_,_), F(WO) , EF(________), 0 , 16, O(Mem) , O(Xmm) , U , U , U ), + INST(Movntq , "movntq" , Enc(SimdMov) , U , O_000F00(E7,U,_,_,_), F(WO) , EF(________), 0 , 8 , O(Mem) , O(Mm) , U , U , U ), + INST(Movntsd , "movntsd" , Enc(SimdMov) , U , O_F20F00(2B,U,_,_,_), F(WO) , EF(________), 0 , 8 , O(Mem) , O(Xmm) , U , U , U ), + INST(Movntss , "movntss" , Enc(SimdMov) , U , O_F30F00(2B,U,_,_,_), F(WO) , EF(________), 0 , 4 , O(Mem) , O(Xmm) , U , U , U ), + INST(Movq , "movq" , Enc(SimdMovQ) , O_000F00(6E,U,_,W,_), O_000F00(7E,U,_,W,_), F(WO) , EF(________), 0 , 16, O(Gq)|O(MmXmmMem) , O(Gq)|O(MmXmmMem) , U , U , U ), + INST(Movq2dq , "movq2dq" , Enc(SimdRm) , O_F30F00(D6,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(Mm) , U , U , U ), + INST(MovsB , "movs_b" , Enc(X86Op) , O_000000(A4,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(MovsD , "movs_d" , Enc(X86Op) , O_000000(A5,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(MovsQ , "movs_q" , Enc(X86Op) , O_000000(A5,U,_,W,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(MovsW , "movs_w" , Enc(X86Op_66H) , O_000000(A5,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Movsd , "movsd" , Enc(SimdMov) , O_F20F00(10,U,_,_,_), O_F20F00(11,U,_,_,_), F(WO)|F(ZeroIfMem) , EF(________), 0 , 8 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movshdup , "movshdup" , Enc(SimdRm) , O_F30F00(16,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Movsldup , "movsldup" , Enc(SimdRm) , O_F30F00(12,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Movss , "movss" , Enc(SimdMov) , O_F30F00(10,U,_,_,_), O_F30F00(11,U,_,_,_), F(WO)|F(ZeroIfMem) , EF(________), 0 , 4 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movsx , "movsx" , Enc(X86MovsxMovzx) , O_000F00(BE,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqdw) , O(GwbMem) , U , U , U ), + INST(Movsxd , "movsxd" , Enc(X86Movsxd) , O_000000(63,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gq) , O(GdMem) , U , U , U ), + INST(Movupd , "movupd" , Enc(SimdMov) , O_660F00(10,U,_,_,_), O_660F00(11,U,_,_,_), F(WO) , EF(________), 0 , 16, O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movups , "movups" , Enc(SimdMov) , O_000F00(10,U,_,_,_), O_000F00(11,U,_,_,_), F(WO) , EF(________), 0 , 16, O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Movzx , "movzx" , Enc(X86MovsxMovzx) , O_000F00(B6,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqdw) , O(GwbMem) , U , U , U ), + INST(Mpsadbw , "mpsadbw" , Enc(SimdRmi) , O_660F3A(42,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Mul , "mul" , Enc(X86Rm_B) , O_000000(F6,4,_,_,_), U , F(RW)|F(Special) , EF(WUUUUW__), 0 , 0 , 0 , 0 , U , U , U ), + INST(Mulpd , "mulpd" , Enc(SimdRm) , O_660F00(59,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Mulps , "mulps" , Enc(SimdRm) , O_000F00(59,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Mulsd , "mulsd" , Enc(SimdRm) , O_F20F00(59,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Mulss , "mulss" , Enc(SimdRm) , O_F30F00(59,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Mulx , "mulx" , Enc(AvxRvm_OptW) , O_F20F38(F6,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Gqd) , O(Gqd) , O(GqdMem) , U , U ), + INST(Mwait , "mwait" , Enc(X86Op) , O_000F01(C9,U,_,_,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Neg , "neg" , Enc(X86Rm_B) , O_000000(F6,3,_,_,_), U , F(RW)|F(Lock) , EF(WWWWWW__), 0 , 0 , O(GqdwbMem) , U , U , U , U ), + INST(Nop , "nop" , Enc(X86Op) , O_000000(90,U,_,_,_), U , F(None) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Not , "not" , Enc(X86Rm_B) , O_000000(F6,2,_,_,_), U , F(RW)|F(Lock) , EF(________), 0 , 0 , O(GqdwbMem) , U , U , U , U ), + INST(Or , "or" , Enc(X86Arith) , O_000000(08,1,_,_,_), U , F(RW)|F(Lock) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(Orpd , "orpd" , Enc(SimdRm) , O_660F00(56,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Orps , "orps" , Enc(SimdRm) , O_000F00(56,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pabsb , "pabsb" , Enc(SimdRm_P) , O_000F38(1C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pabsd , "pabsd" , Enc(SimdRm_P) , O_000F38(1E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pabsw , "pabsw" , Enc(SimdRm_P) , O_000F38(1D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Packssdw , "packssdw" , Enc(SimdRm_P) , O_000F00(6B,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Packsswb , "packsswb" , Enc(SimdRm_P) , O_000F00(63,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Packusdw , "packusdw" , Enc(SimdRm) , O_660F38(2B,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Packuswb , "packuswb" , Enc(SimdRm_P) , O_000F00(67,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddb , "paddb" , Enc(SimdRm_P) , O_000F00(FC,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddd , "paddd" , Enc(SimdRm_P) , O_000F00(FE,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddq , "paddq" , Enc(SimdRm_P) , O_000F00(D4,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddsb , "paddsb" , Enc(SimdRm_P) , O_000F00(EC,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddsw , "paddsw" , Enc(SimdRm_P) , O_000F00(ED,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddusb , "paddusb" , Enc(SimdRm_P) , O_000F00(DC,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddusw , "paddusw" , Enc(SimdRm_P) , O_000F00(DD,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Paddw , "paddw" , Enc(SimdRm_P) , O_000F00(FD,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Palignr , "palignr" , Enc(SimdRmi_P) , O_000F3A(0F,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , O(Imm) , U , U ), + INST(Pand , "pand" , Enc(SimdRm_P) , O_000F00(DB,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pandn , "pandn" , Enc(SimdRm_P) , O_000F00(DF,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pause , "pause" , Enc(X86Op) , O_F30000(90,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Pavgb , "pavgb" , Enc(SimdRm_P) , O_000F00(E0,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pavgusb , "pavgusb" , Enc(Simd3dNow) , O_000F0F(BF,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pavgw , "pavgw" , Enc(SimdRm_P) , O_000F00(E3,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pblendvb , "pblendvb" , Enc(SimdRm) , O_660F38(10,U,_,_,_), U , F(RW)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pblendw , "pblendw" , Enc(SimdRmi) , O_660F3A(0E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pclmulqdq , "pclmulqdq" , Enc(SimdRmi) , O_660F3A(44,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pcmpeqb , "pcmpeqb" , Enc(SimdRm_P) , O_000F00(74,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pcmpeqd , "pcmpeqd" , Enc(SimdRm_P) , O_000F00(76,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pcmpeqq , "pcmpeqq" , Enc(SimdRm) , O_660F38(29,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pcmpeqw , "pcmpeqw" , Enc(SimdRm_P) , O_000F00(75,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pcmpestri , "pcmpestri" , Enc(SimdRmi) , O_660F3A(61,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pcmpestrm , "pcmpestrm" , Enc(SimdRmi) , O_660F3A(60,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pcmpgtb , "pcmpgtb" , Enc(SimdRm_P) , O_000F00(64,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pcmpgtd , "pcmpgtd" , Enc(SimdRm_P) , O_000F00(66,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pcmpgtq , "pcmpgtq" , Enc(SimdRm) , O_660F38(37,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pcmpgtw , "pcmpgtw" , Enc(SimdRm_P) , O_000F00(65,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pcmpistri , "pcmpistri" , Enc(SimdRmi) , O_660F3A(63,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pcmpistrm , "pcmpistrm" , Enc(SimdRmi) , O_660F3A(62,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pdep , "pdep" , Enc(AvxRvm_OptW) , O_F20F38(F5,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqd) , O(Gqd) , O(GqdMem) , U , U ), + INST(Pext , "pext" , Enc(AvxRvm_OptW) , O_F30F38(F5,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqd) , O(Gqd) , O(GqdMem) , U , U ), + INST(Pextrb , "pextrb" , Enc(SimdExtract) , O_000F3A(14,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gd)|O(Gb)|O(Mem), O(Xmm) , U , U , U ), + INST(Pextrd , "pextrd" , Enc(SimdExtract) , O_000F3A(16,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(GdMem) , O(Xmm) , U , U , U ), + INST(Pextrq , "pextrq" , Enc(SimdExtract) , O_000F3A(16,U,_,W,_), U , F(WO) , EF(________), 0 , 8 , O(GqdMem) , O(Xmm) , U , U , U ), + INST(Pextrw , "pextrw" , Enc(SimdPextrw) , O_000F00(C5,U,_,_,_), O_000F3A(15,U,_,_,_), F(WO) , EF(________), 0 , 8 , O(GdMem) , O(MmXmm) , U , U , U ), + INST(Pf2id , "pf2id" , Enc(Simd3dNow) , O_000F0F(1D,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pf2iw , "pf2iw" , Enc(Simd3dNow) , O_000F0F(1C,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfacc , "pfacc" , Enc(Simd3dNow) , O_000F0F(AE,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfadd , "pfadd" , Enc(Simd3dNow) , O_000F0F(9E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfcmpeq , "pfcmpeq" , Enc(Simd3dNow) , O_000F0F(B0,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfcmpge , "pfcmpge" , Enc(Simd3dNow) , O_000F0F(90,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfcmpgt , "pfcmpgt" , Enc(Simd3dNow) , O_000F0F(A0,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfmax , "pfmax" , Enc(Simd3dNow) , O_000F0F(A4,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfmin , "pfmin" , Enc(Simd3dNow) , O_000F0F(94,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfmul , "pfmul" , Enc(Simd3dNow) , O_000F0F(B4,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfnacc , "pfnacc" , Enc(Simd3dNow) , O_000F0F(8A,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfpnacc , "pfpnacc" , Enc(Simd3dNow) , O_000F0F(8E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfrcp , "pfrcp" , Enc(Simd3dNow) , O_000F0F(96,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfrcpit1 , "pfrcpit1" , Enc(Simd3dNow) , O_000F0F(A6,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfrcpit2 , "pfrcpit2" , Enc(Simd3dNow) , O_000F0F(B6,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfrsqit1 , "pfrsqit1" , Enc(Simd3dNow) , O_000F0F(A7,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfrsqrt , "pfrsqrt" , Enc(Simd3dNow) , O_000F0F(97,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfsub , "pfsub" , Enc(Simd3dNow) , O_000F0F(9A,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pfsubr , "pfsubr" , Enc(Simd3dNow) , O_000F0F(AA,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Phaddd , "phaddd" , Enc(SimdRm_P) , O_000F38(02,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Phaddsw , "phaddsw" , Enc(SimdRm_P) , O_000F38(03,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Phaddw , "phaddw" , Enc(SimdRm_P) , O_000F38(01,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Phminposuw , "phminposuw" , Enc(SimdRm) , O_660F38(41,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Phsubd , "phsubd" , Enc(SimdRm_P) , O_000F38(06,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Phsubsw , "phsubsw" , Enc(SimdRm_P) , O_000F38(07,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Phsubw , "phsubw" , Enc(SimdRm_P) , O_000F38(05,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pi2fd , "pi2fd" , Enc(Simd3dNow) , O_000F0F(0D,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pi2fw , "pi2fw" , Enc(Simd3dNow) , O_000F0F(0C,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pinsrb , "pinsrb" , Enc(SimdRmi) , O_660F3A(20,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(GdMem) , O(Imm) , U , U ), + INST(Pinsrd , "pinsrd" , Enc(SimdRmi) , O_660F3A(22,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(GdMem) , O(Imm) , U , U ), + INST(Pinsrq , "pinsrq" , Enc(SimdRmi) , O_660F3A(22,U,_,W,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(GqMem) , O(Imm) , U , U ), + INST(Pinsrw , "pinsrw" , Enc(SimdRmi_P) , O_000F00(C4,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(GdMem) , O(Imm) , U , U ), + INST(Pmaddubsw , "pmaddubsw" , Enc(SimdRm_P) , O_000F38(04,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmaddwd , "pmaddwd" , Enc(SimdRm_P) , O_000F00(F5,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmaxsb , "pmaxsb" , Enc(SimdRm) , O_660F38(3C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmaxsd , "pmaxsd" , Enc(SimdRm) , O_660F38(3D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmaxsw , "pmaxsw" , Enc(SimdRm_P) , O_000F00(EE,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmaxub , "pmaxub" , Enc(SimdRm_P) , O_000F00(DE,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmaxud , "pmaxud" , Enc(SimdRm) , O_660F38(3F,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmaxuw , "pmaxuw" , Enc(SimdRm) , O_660F38(3E,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pminsb , "pminsb" , Enc(SimdRm) , O_660F38(38,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pminsd , "pminsd" , Enc(SimdRm) , O_660F38(39,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pminsw , "pminsw" , Enc(SimdRm_P) , O_000F00(EA,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pminub , "pminub" , Enc(SimdRm_P) , O_000F00(DA,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pminud , "pminud" , Enc(SimdRm) , O_660F38(3B,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pminuw , "pminuw" , Enc(SimdRm) , O_660F38(3A,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovmskb , "pmovmskb" , Enc(SimdRm_PQ) , O_000F00(D7,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , O(MmXmm) , U , U , U ), + INST(Pmovsxbd , "pmovsxbd" , Enc(SimdRm) , O_660F38(21,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovsxbq , "pmovsxbq" , Enc(SimdRm) , O_660F38(22,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovsxbw , "pmovsxbw" , Enc(SimdRm) , O_660F38(20,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovsxdq , "pmovsxdq" , Enc(SimdRm) , O_660F38(25,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovsxwd , "pmovsxwd" , Enc(SimdRm) , O_660F38(23,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovsxwq , "pmovsxwq" , Enc(SimdRm) , O_660F38(24,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovzxbd , "pmovzxbd" , Enc(SimdRm) , O_660F38(31,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovzxbq , "pmovzxbq" , Enc(SimdRm) , O_660F38(32,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovzxbw , "pmovzxbw" , Enc(SimdRm) , O_660F38(30,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovzxdq , "pmovzxdq" , Enc(SimdRm) , O_660F38(35,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovzxwd , "pmovzxwd" , Enc(SimdRm) , O_660F38(33,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmovzxwq , "pmovzxwq" , Enc(SimdRm) , O_660F38(34,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmuldq , "pmuldq" , Enc(SimdRm) , O_660F38(28,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmulhrsw , "pmulhrsw" , Enc(SimdRm_P) , O_000F38(0B,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmulhrw , "pmulhrw" , Enc(Simd3dNow) , O_000F0F(B7,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(Pmulhuw , "pmulhuw" , Enc(SimdRm_P) , O_000F00(E4,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmulhw , "pmulhw" , Enc(SimdRm_P) , O_000F00(E5,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmulld , "pmulld" , Enc(SimdRm) , O_660F38(40,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Pmullw , "pmullw" , Enc(SimdRm_P) , O_000F00(D5,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pmuludq , "pmuludq" , Enc(SimdRm_P) , O_000F00(F4,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pop , "pop" , Enc(X86Pop) , O_000000(8F,0,_,_,_), O_000000(58,U,_,_,_), F(WO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , 0 , U , U , U , U ), + INST(Popa , "popa" , Enc(X86Op) , O_000000(61,U,_,_,_), U , F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Popcnt , "popcnt" , Enc(X86RegRm) , O_F30F00(B8,U,_,_,_), U , F(WO) , EF(WWWWWW__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Popf , "popf" , Enc(X86Op) , O_000000(9D,U,_,_,_), U , F(Volatile)|F(Special) , EF(WWWWWWWW), 0 , 0 , U , U , U , U , U ), + INST(Por , "por" , Enc(SimdRm_P) , O_000F00(EB,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Prefetch , "prefetch" , Enc(X86Prefetch) , O_000F00(18,U,_,_,_), U , F(RO)|F(Volatile) , EF(________), 0 , 0 , O(Mem) , O(Imm) , U , U , U ), + INST(Prefetch3dNow , "prefetch3dnow" , Enc(X86M) , O_000F00(0D,0,_,_,_), U , F(RO)|F(Volatile) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Prefetchw , "prefetchw" , Enc(X86M) , O_000F00(0D,1,_,_,_), U , F(RO)|F(Volatile) , EF(UUUUUU__), 0 , 0 , O(Mem) , O(Imm) , U , U , U ), + INST(Prefetchwt1 , "prefetchwt1" , Enc(X86M) , O_000F00(0D,2,_,_,_), U , F(RO)|F(Volatile) , EF(UUUUUU__), 0 , 0 , O(Mem) , O(Imm) , U , U , U ), + INST(Psadbw , "psadbw" , Enc(SimdRm_P) , O_000F00(F6,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pshufb , "pshufb" , Enc(SimdRm_P) , O_000F38(00,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pshufd , "pshufd" , Enc(SimdRmi) , O_660F00(70,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pshufhw , "pshufhw" , Enc(SimdRmi) , O_F30F00(70,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pshuflw , "pshuflw" , Enc(SimdRmi) , O_F20F00(70,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Pshufw , "pshufw" , Enc(SimdRmi_P) , O_000F00(70,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , O(Imm) , U , U ), + INST(Psignb , "psignb" , Enc(SimdRm_P) , O_000F38(08,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psignd , "psignd" , Enc(SimdRm_P) , O_000F38(0A,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psignw , "psignw" , Enc(SimdRm_P) , O_000F38(09,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pslld , "pslld" , Enc(SimdRmRi_P) , O_000F00(F2,U,_,_,_), O_000F00(72,6,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Pslldq , "pslldq" , Enc(SimdRmRi) , U , O_660F00(73,7,_,_,_), F(RW) , EF(________), 0 , 0 , O(Xmm) , O(Imm) , U , U , U ), + INST(Psllq , "psllq" , Enc(SimdRmRi_P) , O_000F00(F3,U,_,_,_), O_000F00(73,6,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Psllw , "psllw" , Enc(SimdRmRi_P) , O_000F00(F1,U,_,_,_), O_000F00(71,6,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Psrad , "psrad" , Enc(SimdRmRi_P) , O_000F00(E2,U,_,_,_), O_000F00(72,4,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Psraw , "psraw" , Enc(SimdRmRi_P) , O_000F00(E1,U,_,_,_), O_000F00(71,4,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Psrld , "psrld" , Enc(SimdRmRi_P) , O_000F00(D2,U,_,_,_), O_000F00(72,2,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Psrldq , "psrldq" , Enc(SimdRmRi) , U , O_660F00(73,3,_,_,_), F(RW) , EF(________), 0 , 0 , O(Xmm) , O(Imm) , U , U , U ), + INST(Psrlq , "psrlq" , Enc(SimdRmRi_P) , O_000F00(D3,U,_,_,_), O_000F00(73,2,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Psrlw , "psrlw" , Enc(SimdRmRi_P) , O_000F00(D1,U,_,_,_), O_000F00(71,2,_,_,_), F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem)|O(Imm), U , U , U ), + INST(Psubb , "psubb" , Enc(SimdRm_P) , O_000F00(F8,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psubd , "psubd" , Enc(SimdRm_P) , O_000F00(FA,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psubq , "psubq" , Enc(SimdRm_P) , O_000F00(FB,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psubsb , "psubsb" , Enc(SimdRm_P) , O_000F00(E8,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psubsw , "psubsw" , Enc(SimdRm_P) , O_000F00(E9,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psubusb , "psubusb" , Enc(SimdRm_P) , O_000F00(D8,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psubusw , "psubusw" , Enc(SimdRm_P) , O_000F00(D9,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Psubw , "psubw" , Enc(SimdRm_P) , O_000F00(F9,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Pswapd , "pswapd" , Enc(Simd3dNow) , O_000F0F(BB,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(Ptest , "ptest" , Enc(SimdRm) , O_660F38(17,U,_,_,_), U , F(RO) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Punpckhbw , "punpckhbw" , Enc(SimdRm_P) , O_000F00(68,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Punpckhdq , "punpckhdq" , Enc(SimdRm_P) , O_000F00(6A,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Punpckhqdq , "punpckhqdq" , Enc(SimdRm) , O_660F00(6D,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Punpckhwd , "punpckhwd" , Enc(SimdRm_P) , O_000F00(69,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Punpcklbw , "punpcklbw" , Enc(SimdRm_P) , O_000F00(60,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Punpckldq , "punpckldq" , Enc(SimdRm_P) , O_000F00(62,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Punpcklqdq , "punpcklqdq" , Enc(SimdRm) , O_660F00(6C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Punpcklwd , "punpcklwd" , Enc(SimdRm_P) , O_000F00(61,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Push , "push" , Enc(X86Push) , O_000000(FF,6,_,_,_), O_000000(50,U,_,_,_), F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , 0 , U , U , U , U ), + INST(Pusha , "pusha" , Enc(X86Op) , O_000000(60,U,_,_,_), U , F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Pushf , "pushf" , Enc(X86Op) , O_000000(9C,U,_,_,_), U , F(Volatile)|F(Special) , EF(RRRRRRRR), 0 , 0 , U , U , U , U , U ), + INST(Pxor , "pxor" , Enc(SimdRm_P) , O_000F00(EF,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), + INST(Rcl , "rcl" , Enc(X86Rot) , O_000000(D0,2,_,_,_), U , F(RW)|F(Special) , EF(W____X__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Rcpps , "rcpps" , Enc(SimdRm) , O_000F00(53,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Rcpss , "rcpss" , Enc(SimdRm) , O_F30F00(53,U,_,_,_), U , F(WO) , EF(________), 0 , 4 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Rcr , "rcr" , Enc(X86Rot) , O_000000(D0,3,_,_,_), U , F(RW)|F(Special) , EF(W____X__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Rdfsbase , "rdfsbase" , Enc(X86Rm) , O_F30F00(AE,0,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , U , U , U , U ), + INST(Rdgsbase , "rdgsbase" , Enc(X86Rm) , O_F30F00(AE,1,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Gqd) , U , U , U , U ), + INST(Rdrand , "rdrand" , Enc(X86Rm) , O_000F00(C7,6,_,_,_), U , F(WO) , EF(WWWWWW__), 0 , 8 , O(Gqdw) , U , U , U , U ), + INST(Rdseed , "rdseed" , Enc(X86Rm) , O_000F00(C7,7,_,_,_), U , F(WO) , EF(WWWWWW__), 0 , 8 , O(Gqdw) , U , U , U , U ), + INST(Rdtsc , "rdtsc" , Enc(X86Op) , O_000F00(31,U,_,_,_), U , F(WO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Rdtscp , "rdtscp" , Enc(X86Op) , O_000F01(F9,U,_,_,_), U , F(WO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(RepLodsB , "rep lods_b" , Enc(X86Rep) , O_000000(AC,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepLodsD , "rep lods_d" , Enc(X86Rep) , O_000000(AD,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepLodsQ , "rep lods_q" , Enc(X86Rep) , O_000000(AD,1,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepLodsW , "rep lods_w" , Enc(X86Rep) , O_660000(AD,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepMovsB , "rep movs_b" , Enc(X86Rep) , O_000000(A4,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepMovsD , "rep movs_d" , Enc(X86Rep) , O_000000(A5,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepMovsQ , "rep movs_q" , Enc(X86Rep) , O_000000(A5,1,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepMovsW , "rep movs_w" , Enc(X86Rep) , O_660000(A5,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepStosB , "rep stos_b" , Enc(X86Rep) , O_000000(AA,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepStosD , "rep stos_d" , Enc(X86Rep) , O_000000(AB,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepStosQ , "rep stos_q" , Enc(X86Rep) , O_000000(AB,1,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepStosW , "rep stos_w" , Enc(X86Rep) , O_660000(AB,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , O(Mem) , U , U , U , U ), + INST(RepeCmpsB , "repe cmps_b" , Enc(X86Rep) , O_000000(A6,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepeCmpsD , "repe cmps_d" , Enc(X86Rep) , O_000000(A7,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepeCmpsQ , "repe cmps_q" , Enc(X86Rep) , O_000000(A7,1,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepeCmpsW , "repe cmps_w" , Enc(X86Rep) , O_660000(A7,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepeScasB , "repe scas_b" , Enc(X86Rep) , O_000000(AE,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepeScasD , "repe scas_d" , Enc(X86Rep) , O_000000(AF,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepeScasQ , "repe scas_q" , Enc(X86Rep) , O_000000(AF,1,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepeScasW , "repe scas_w" , Enc(X86Rep) , O_660000(AF,1,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneCmpsB , "repne cmps_b" , Enc(X86Rep) , O_000000(A6,0,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneCmpsD , "repne cmps_d" , Enc(X86Rep) , O_000000(A7,0,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneCmpsQ , "repne cmps_q" , Enc(X86Rep) , O_000000(A7,0,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneCmpsW , "repne cmps_w" , Enc(X86Rep) , O_660000(A7,0,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneScasB , "repne scas_b" , Enc(X86Rep) , O_000000(AE,0,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneScasD , "repne scas_d" , Enc(X86Rep) , O_000000(AF,0,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneScasQ , "repne scas_q" , Enc(X86Rep) , O_000000(AF,0,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(RepneScasW , "repne scas_w" , Enc(X86Rep) , O_660000(AF,0,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , O(Mem) , O(Mem) , U , U , U ), + INST(Ret , "ret" , Enc(X86Ret) , O_000000(C2,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Rol , "rol" , Enc(X86Rot) , O_000000(D0,0,_,_,_), U , F(RW)|F(Special) , EF(W____W__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Ror , "ror" , Enc(X86Rot) , O_000000(D0,1,_,_,_), U , F(RW)|F(Special) , EF(W____W__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Rorx , "rorx" , Enc(AvxRmi_OptW) , O_F20F3A(F0,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqd) , O(GqdMem) , O(Imm) , U , U ), + INST(Roundpd , "roundpd" , Enc(SimdRmi) , O_660F3A(09,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Roundps , "roundps" , Enc(SimdRmi) , O_660F3A(08,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Roundsd , "roundsd" , Enc(SimdRmi) , O_660F3A(0B,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Roundss , "roundss" , Enc(SimdRmi) , O_660F3A(0A,U,_,_,_), U , F(WO) , EF(________), 0 , 4 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Rsqrtps , "rsqrtps" , Enc(SimdRm) , O_000F00(52,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Rsqrtss , "rsqrtss" , Enc(SimdRm) , O_F30F00(52,U,_,_,_), U , F(WO) , EF(________), 0 , 4 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sahf , "sahf" , Enc(X86Op) , O_000000(9E,U,_,_,_), U , F(RO)|F(Volatile)|F(Special) , EF(_WWWWW__), 0 , 0 , U , U , U , U , U ), + INST(Sal , "sal" , Enc(X86Rot) , O_000000(D0,4,_,_,_), U , F(RW)|F(Special) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Sar , "sar" , Enc(X86Rot) , O_000000(D0,7,_,_,_), U , F(RW)|F(Special) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Sarx , "sarx" , Enc(AvxRmv_OptW) , O_F30F38(F7,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqd) , O(GqdMem) , O(Gqd) , U , U ), + INST(Sbb , "sbb" , Enc(X86Arith) , O_000000(18,3,_,_,_), U , F(RW)|F(Lock) , EF(WWWWWX__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(ScasB , "scas_b" , Enc(X86Op) , O_000000(AE,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(ScasD , "scas_d" , Enc(X86Op) , O_000000(AF,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(ScasQ , "scas_q" , Enc(X86Op) , O_000000(AF,U,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(ScasW , "scas_w" , Enc(X86Op_66H) , O_000000(AF,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(WWWWWWR_), 0 , 0 , U , U , U , U , U ), + INST(Seta , "seta" , Enc(X86Set) , O_000F00(97,U,_,_,_), U , F(WO) , EF(__R__R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setae , "setae" , Enc(X86Set) , O_000F00(93,U,_,_,_), U , F(WO) , EF(_____R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setb , "setb" , Enc(X86Set) , O_000F00(92,U,_,_,_), U , F(WO) , EF(_____R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setbe , "setbe" , Enc(X86Set) , O_000F00(96,U,_,_,_), U , F(WO) , EF(__R__R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setc , "setc" , Enc(X86Set) , O_000F00(92,U,_,_,_), U , F(WO) , EF(_____R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Sete , "sete" , Enc(X86Set) , O_000F00(94,U,_,_,_), U , F(WO) , EF(__R_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setg , "setg" , Enc(X86Set) , O_000F00(9F,U,_,_,_), U , F(WO) , EF(RRR_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setge , "setge" , Enc(X86Set) , O_000F00(9D,U,_,_,_), U , F(WO) , EF(RR______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setl , "setl" , Enc(X86Set) , O_000F00(9C,U,_,_,_), U , F(WO) , EF(RR______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setle , "setle" , Enc(X86Set) , O_000F00(9E,U,_,_,_), U , F(WO) , EF(RRR_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setna , "setna" , Enc(X86Set) , O_000F00(96,U,_,_,_), U , F(WO) , EF(__R__R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnae , "setnae" , Enc(X86Set) , O_000F00(92,U,_,_,_), U , F(WO) , EF(_____R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnb , "setnb" , Enc(X86Set) , O_000F00(93,U,_,_,_), U , F(WO) , EF(_____R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnbe , "setnbe" , Enc(X86Set) , O_000F00(97,U,_,_,_), U , F(WO) , EF(__R__R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnc , "setnc" , Enc(X86Set) , O_000F00(93,U,_,_,_), U , F(WO) , EF(_____R__), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setne , "setne" , Enc(X86Set) , O_000F00(95,U,_,_,_), U , F(WO) , EF(__R_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setng , "setng" , Enc(X86Set) , O_000F00(9E,U,_,_,_), U , F(WO) , EF(RRR_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnge , "setnge" , Enc(X86Set) , O_000F00(9C,U,_,_,_), U , F(WO) , EF(RR______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnl , "setnl" , Enc(X86Set) , O_000F00(9D,U,_,_,_), U , F(WO) , EF(RR______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnle , "setnle" , Enc(X86Set) , O_000F00(9F,U,_,_,_), U , F(WO) , EF(RRR_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setno , "setno" , Enc(X86Set) , O_000F00(91,U,_,_,_), U , F(WO) , EF(R_______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnp , "setnp" , Enc(X86Set) , O_000F00(9B,U,_,_,_), U , F(WO) , EF(____R___), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setns , "setns" , Enc(X86Set) , O_000F00(99,U,_,_,_), U , F(WO) , EF(_R______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setnz , "setnz" , Enc(X86Set) , O_000F00(95,U,_,_,_), U , F(WO) , EF(__R_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Seto , "seto" , Enc(X86Set) , O_000F00(90,U,_,_,_), U , F(WO) , EF(R_______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setp , "setp" , Enc(X86Set) , O_000F00(9A,U,_,_,_), U , F(WO) , EF(____R___), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setpe , "setpe" , Enc(X86Set) , O_000F00(9A,U,_,_,_), U , F(WO) , EF(____R___), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setpo , "setpo" , Enc(X86Set) , O_000F00(9B,U,_,_,_), U , F(WO) , EF(____R___), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Sets , "sets" , Enc(X86Set) , O_000F00(98,U,_,_,_), U , F(WO) , EF(_R______), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Setz , "setz" , Enc(X86Set) , O_000F00(94,U,_,_,_), U , F(WO) , EF(__R_____), 0 , 1 , O(GbMem) , U , U , U , U ), + INST(Sfence , "sfence" , Enc(X86Fence) , O_000F00(AE,7,_,_,_), U , F(Volatile) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Sha1msg1 , "sha1msg1" , Enc(SimdRm) , O_000F38(C9,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sha1msg2 , "sha1msg2" , Enc(SimdRm) , O_000F38(CA,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sha1nexte , "sha1nexte" , Enc(SimdRm) , O_000F38(C8,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sha1rnds4 , "sha1rnds4" , Enc(SimdRmi) , O_000F3A(CC,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Sha256msg1 , "sha256msg1" , Enc(SimdRm) , O_000F38(CC,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sha256msg2 , "sha256msg2" , Enc(SimdRm) , O_000F38(CD,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sha256rnds2 , "sha256rnds2" , Enc(SimdRm) , O_000F38(CB,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Shl , "shl" , Enc(X86Rot) , O_000000(D0,4,_,_,_), U , F(RW)|F(Special) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Shld , "shld" , Enc(X86ShldShrd) , O_000F00(A4,U,_,_,_), U , F(RW)|F(Special) , EF(UWWUWW__), 0 , 0 , O(GqdwbMem) , O(Gb) , U , U , U ), + INST(Shlx , "shlx" , Enc(AvxRmv_OptW) , O_660F38(F7,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqd) , O(GqdMem) , O(Gqd) , U , U ), + INST(Shr , "shr" , Enc(X86Rot) , O_000000(D0,5,_,_,_), U , F(RW)|F(Special) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , U ), + INST(Shrd , "shrd" , Enc(X86ShldShrd) , O_000F00(AC,U,_,_,_), U , F(RW)|F(Special) , EF(UWWUWW__), 0 , 0 , O(GqdwbMem) , O(Gqdwb) , U , U , U ), + INST(Shrx , "shrx" , Enc(AvxRmv_OptW) , O_F20F38(F7,U,_,_,_), U , F(WO) , EF(________), 0 , 0 , O(Gqd) , O(GqdMem) , O(Gqd) , U , U ), + INST(Shufpd , "shufpd" , Enc(SimdRmi) , O_660F00(C6,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Shufps , "shufps" , Enc(SimdRmi) , O_000F00(C6,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Sqrtpd , "sqrtpd" , Enc(SimdRm) , O_660F00(51,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sqrtps , "sqrtps" , Enc(SimdRm) , O_000F00(51,U,_,_,_), U , F(WO) , EF(________), 0 , 16, O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sqrtsd , "sqrtsd" , Enc(SimdRm) , O_F20F00(51,U,_,_,_), U , F(WO) , EF(________), 0 , 8 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Sqrtss , "sqrtss" , Enc(SimdRm) , O_F30F00(51,U,_,_,_), U , F(WO) , EF(________), 0 , 4 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Stc , "stc" , Enc(X86Op) , O_000000(F9,U,_,_,_), U , F(None) , EF(_____W__), 0 , 0 , U , U , U , U , U ), + INST(Std , "std" , Enc(X86Op) , O_000000(FD,U,_,_,_), U , F(None) , EF(______W_), 0 , 0 , U , U , U , U , U ), + INST(Stmxcsr , "stmxcsr" , Enc(X86M) , O_000F00(AE,3,_,_,_), U , F(Volatile) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(StosB , "stos_b" , Enc(X86Op) , O_000000(AA,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , U , U , U , U , U ), + INST(StosD , "stos_d" , Enc(X86Op) , O_000000(AB,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , U , U , U , U , U ), + INST(StosQ , "stos_q" , Enc(X86Op) , O_000000(AB,U,_,W,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , U , U , U , U , U ), + INST(StosW , "stos_w" , Enc(X86Op_66H) , O_000000(AB,U,_,_,_), U , F(RW)|F(Volatile)|F(Special) , EF(______R_), 0 , 0 , U , U , U , U , U ), + INST(Sub , "sub" , Enc(X86Arith) , O_000000(28,5,_,_,_), U , F(RW)|F(Lock) , EF(WWWWWW__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(Subpd , "subpd" , Enc(SimdRm) , O_660F00(5C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Subps , "subps" , Enc(SimdRm) , O_000F00(5C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Subsd , "subsd" , Enc(SimdRm) , O_F20F00(5C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Subss , "subss" , Enc(SimdRm) , O_F30F00(5C,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(T1mskc , "t1mskc" , Enc(XopVm_OptW) , O_00_M09(01,7,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Test , "test" , Enc(X86Test) , O_000000(84,U,_,_,_), O_000000(F6,U,_,_,_), F(RO) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(Gqdwb)|O(Imm) , U , U , U ), + INST(Tzcnt , "tzcnt" , Enc(X86RegRm) , O_F30F00(BC,U,_,_,_), U , F(WO) , EF(UUWUUW__), 0 , 0 , O(Gqdw) , O(GqdwMem) , U , U , U ), + INST(Tzmsk , "tzmsk" , Enc(XopVm_OptW) , O_00_M09(01,4,_,_,_), U , F(WO) , EF(WWWUUW__), 0 , 0 , O(Gqd) , O(GqdMem) , U , U , U ), + INST(Ucomisd , "ucomisd" , Enc(SimdRm) , O_660F00(2E,U,_,_,_), U , F(RO) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Ucomiss , "ucomiss" , Enc(SimdRm) , O_000F00(2E,U,_,_,_), U , F(RO) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Ud2 , "ud2" , Enc(X86Op) , O_000F00(0B,U,_,_,_), U , F(None) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Unpckhpd , "unpckhpd" , Enc(SimdRm) , O_660F00(15,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Unpckhps , "unpckhps" , Enc(SimdRm) , O_000F00(15,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Unpcklpd , "unpcklpd" , Enc(SimdRm) , O_660F00(14,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Unpcklps , "unpcklps" , Enc(SimdRm) , O_000F00(14,U,_,_,_), U , F(RW) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vaddpd , "vaddpd" , Enc(AvxRvm_OptL) , O_660F00(58,U,_,I,1), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vaddps , "vaddps" , Enc(AvxRvm_OptL) , O_000F00(58,U,_,I,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vaddsd , "vaddsd" , Enc(AvxRvm) , O_F20F00(58,U,0,I,1), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vaddss , "vaddss" , Enc(AvxRvm) , O_F30F00(58,U,0,I,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vaddsubpd , "vaddsubpd" , Enc(AvxRvm_OptL) , O_660F00(D0,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vaddsubps , "vaddsubps" , Enc(AvxRvm_OptL) , O_F20F00(D0,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vaesdec , "vaesdec" , Enc(AvxRvm) , O_660F38(DE,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vaesdeclast , "vaesdeclast" , Enc(AvxRvm) , O_660F38(DF,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vaesenc , "vaesenc" , Enc(AvxRvm) , O_660F38(DC,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vaesenclast , "vaesenclast" , Enc(AvxRvm) , O_660F38(DD,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vaesimc , "vaesimc" , Enc(AvxRm) , O_660F38(DB,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vaeskeygenassist, "vaeskeygenassist", Enc(AvxRmi) , O_660F3A(DF,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Vandnpd , "vandnpd" , Enc(AvxRvm_OptL) , O_660F00(55,U,_,_,1), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vandnps , "vandnps" , Enc(AvxRvm_OptL) , O_000F00(55,U,_,_,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vandpd , "vandpd" , Enc(AvxRvm_OptL) , O_660F00(54,U,_,_,1), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vandps , "vandps" , Enc(AvxRvm_OptL) , O_000F00(54,U,_,_,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vblendpd , "vblendpd" , Enc(AvxRvmi_OptL) , O_660F3A(0D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vblendps , "vblendps" , Enc(AvxRvmi_OptL) , O_660F3A(0C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vblendvpd , "vblendvpd" , Enc(AvxRvmr_OptL) , O_660F3A(4B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Xy) , U ), + INST(Vblendvps , "vblendvps" , Enc(AvxRvmr_OptL) , O_660F3A(4A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Xy) , U ), + INST(Vbroadcastf128 , "vbroadcastf128" , Enc(AvxRm) , O_660F38(1A,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Mem) , U , U , U ), + INST(Vbroadcasti128 , "vbroadcasti128" , Enc(AvxRm) , O_660F38(5A,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Mem) , U , U , U ), + INST(Vbroadcastsd , "vbroadcastsd" , Enc(AvxRm) , O_660F38(19,U,L,0,1), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(XmmMem) , U , U , U ), + INST(Vbroadcastss , "vbroadcastss" , Enc(AvxRm_OptL) , O_660F38(18,U,_,0,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vcmppd , "vcmppd" , Enc(AvxRvmi_OptL) , O_660F00(C2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vcmpps , "vcmpps" , Enc(AvxRvmi_OptL) , O_000F00(C2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vcmpsd , "vcmpsd" , Enc(AvxRvmi) , O_F20F00(C2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vcmpss , "vcmpss" , Enc(AvxRvmi) , O_F30F00(C2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vcomisd , "vcomisd" , Enc(AvxRm) , O_660F00(2F,U,_,_,_), U , F(RO)|F(Avx) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vcomiss , "vcomiss" , Enc(AvxRm) , O_000F00(2F,U,_,_,_), U , F(RO)|F(Avx) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vcvtdq2pd , "vcvtdq2pd" , Enc(AvxRm_OptL) , O_F30F00(E6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vcvtdq2ps , "vcvtdq2ps" , Enc(AvxRm_OptL) , O_000F00(5B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vcvtpd2dq , "vcvtpd2dq" , Enc(AvxRm) , O_F20F00(E6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XyMem) , U , U , U ), + INST(Vcvtpd2ps , "vcvtpd2ps" , Enc(AvxRm) , O_660F00(5A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XyMem) , U , U , U ), + INST(Vcvtph2ps , "vcvtph2ps" , Enc(AvxRm_OptL) , O_660F38(13,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vcvtps2dq , "vcvtps2dq" , Enc(AvxRm_OptL) , O_660F00(5B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vcvtps2pd , "vcvtps2pd" , Enc(AvxRm_OptL) , O_000F00(5A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vcvtps2ph , "vcvtps2ph" , Enc(AvxMri_OptL) , O_660F3A(1D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Xy) , O(Imm) , U , U ), + INST(Vcvtsd2si , "vcvtsd2si" , Enc(AvxRm) , O_F20F00(2D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Vcvtsd2ss , "vcvtsd2ss" , Enc(AvxRvm) , O_F20F00(5A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vcvtsi2sd , "vcvtsi2sd" , Enc(AvxRvm) , O_F20F00(2A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(GqdMem) , U , U ), + INST(Vcvtsi2ss , "vcvtsi2ss" , Enc(AvxRvm) , O_F30F00(2A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(GqdMem) , U , U ), + INST(Vcvtss2sd , "vcvtss2sd" , Enc(AvxRvm) , O_F30F00(5A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vcvtss2si , "vcvtss2si" , Enc(AvxRm) , O_F20F00(2D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Vcvttpd2dq , "vcvttpd2dq" , Enc(AvxRm_OptL) , O_660F00(E6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XyMem) , U , U , U ), + INST(Vcvttps2dq , "vcvttps2dq" , Enc(AvxRm_OptL) , O_F30F00(5B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vcvttsd2si , "vcvttsd2si" , Enc(AvxRm) , O_F20F00(2C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Vcvttss2si , "vcvttss2si" , Enc(AvxRm) , O_F30F00(2C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Gqd) , O(XmmMem) , U , U , U ), + INST(Vdivpd , "vdivpd" , Enc(AvxRvm_OptL) , O_660F00(5E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vdivps , "vdivps" , Enc(AvxRvm_OptL) , O_000F00(5E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vdivsd , "vdivsd" , Enc(AvxRvm) , O_F20F00(5E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vdivss , "vdivss" , Enc(AvxRvm) , O_F30F00(5E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vdppd , "vdppd" , Enc(AvxRvmi) , O_660F3A(41,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vdpps , "vdpps" , Enc(AvxRvmi_OptL) , O_660F3A(40,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vextractf128 , "vextractf128" , Enc(AvxMri) , O_660F3A(19,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Ymm) , O(Imm) , U , U ), + INST(Vextracti128 , "vextracti128" , Enc(AvxMri) , O_660F3A(39,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Ymm) , O(Imm) , U , U ), + INST(Vextractps , "vextractps" , Enc(AvxMri) , O_660F3A(17,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(GqdMem) , O(Xmm) , O(Imm) , U , U ), + INST(Vfmadd132pd , "vfmadd132pd" , Enc(AvxRvm_OptL) , O_660F38(98,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmadd132ps , "vfmadd132ps" , Enc(AvxRvm_OptL) , O_660F38(98,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmadd132sd , "vfmadd132sd" , Enc(AvxRvm) , O_660F38(99,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmadd132ss , "vfmadd132ss" , Enc(AvxRvm) , O_660F38(99,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmadd213pd , "vfmadd213pd" , Enc(AvxRvm_OptL) , O_660F38(A8,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmadd213ps , "vfmadd213ps" , Enc(AvxRvm_OptL) , O_660F38(A8,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmadd213sd , "vfmadd213sd" , Enc(AvxRvm) , O_660F38(A9,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmadd213ss , "vfmadd213ss" , Enc(AvxRvm) , O_660F38(A9,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmadd231pd , "vfmadd231pd" , Enc(AvxRvm_OptL) , O_660F38(B8,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmadd231ps , "vfmadd231ps" , Enc(AvxRvm_OptL) , O_660F38(B8,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmadd231sd , "vfmadd231sd" , Enc(AvxRvm) , O_660F38(B9,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmadd231ss , "vfmadd231ss" , Enc(AvxRvm) , O_660F38(B9,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmaddpd , "vfmaddpd" , Enc(Fma4_OptL) , O_660F3A(69,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmaddps , "vfmaddps" , Enc(Fma4_OptL) , O_660F3A(68,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmaddsd , "vfmaddsd" , Enc(Fma4) , O_660F3A(6B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfmaddss , "vfmaddss" , Enc(Fma4) , O_660F3A(6A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfmaddsub132pd , "vfmaddsub132pd" , Enc(AvxRvm_OptL) , O_660F38(96,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmaddsub132ps , "vfmaddsub132ps" , Enc(AvxRvm_OptL) , O_660F38(96,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmaddsub213pd , "vfmaddsub213pd" , Enc(AvxRvm_OptL) , O_660F38(A6,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmaddsub213ps , "vfmaddsub213ps" , Enc(AvxRvm_OptL) , O_660F38(A6,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmaddsub231pd , "vfmaddsub231pd" , Enc(AvxRvm_OptL) , O_660F38(B6,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmaddsub231ps , "vfmaddsub231ps" , Enc(AvxRvm_OptL) , O_660F38(B6,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmaddsubpd , "vfmaddsubpd" , Enc(Fma4_OptL) , O_660F3A(5D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmaddsubps , "vfmaddsubps" , Enc(Fma4_OptL) , O_660F3A(5C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmsub132pd , "vfmsub132pd" , Enc(AvxRvm_OptL) , O_660F38(9A,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsub132ps , "vfmsub132ps" , Enc(AvxRvm_OptL) , O_660F38(9A,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsub132sd , "vfmsub132sd" , Enc(AvxRvm) , O_660F38(9B,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmsub132ss , "vfmsub132ss" , Enc(AvxRvm) , O_660F38(9B,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmsub213pd , "vfmsub213pd" , Enc(AvxRvm_OptL) , O_660F38(AA,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsub213ps , "vfmsub213ps" , Enc(AvxRvm_OptL) , O_660F38(AA,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsub213sd , "vfmsub213sd" , Enc(AvxRvm) , O_660F38(AB,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmsub213ss , "vfmsub213ss" , Enc(AvxRvm) , O_660F38(AB,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmsub231pd , "vfmsub231pd" , Enc(AvxRvm_OptL) , O_660F38(BA,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsub231ps , "vfmsub231ps" , Enc(AvxRvm_OptL) , O_660F38(BA,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsub231sd , "vfmsub231sd" , Enc(AvxRvm) , O_660F38(BB,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmsub231ss , "vfmsub231ss" , Enc(AvxRvm) , O_660F38(BB,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfmsubadd132pd , "vfmsubadd132pd" , Enc(AvxRvm_OptL) , O_660F38(97,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsubadd132ps , "vfmsubadd132ps" , Enc(AvxRvm_OptL) , O_660F38(97,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsubadd213pd , "vfmsubadd213pd" , Enc(AvxRvm_OptL) , O_660F38(A7,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsubadd213ps , "vfmsubadd213ps" , Enc(AvxRvm_OptL) , O_660F38(A7,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsubadd231pd , "vfmsubadd231pd" , Enc(AvxRvm_OptL) , O_660F38(B7,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsubadd231ps , "vfmsubadd231ps" , Enc(AvxRvm_OptL) , O_660F38(B7,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfmsubaddpd , "vfmsubaddpd" , Enc(Fma4_OptL) , O_660F3A(5F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmsubaddps , "vfmsubaddps" , Enc(Fma4_OptL) , O_660F3A(5E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmsubpd , "vfmsubpd" , Enc(Fma4_OptL) , O_660F3A(6D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmsubps , "vfmsubps" , Enc(Fma4_OptL) , O_660F3A(6C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfmsubsd , "vfmsubsd" , Enc(Fma4) , O_660F3A(6F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfmsubss , "vfmsubss" , Enc(Fma4) , O_660F3A(6E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfnmadd132pd , "vfnmadd132pd" , Enc(AvxRvm_OptL) , O_660F38(9C,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmadd132ps , "vfnmadd132ps" , Enc(AvxRvm_OptL) , O_660F38(9C,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmadd132sd , "vfnmadd132sd" , Enc(AvxRvm) , O_660F38(9D,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmadd132ss , "vfnmadd132ss" , Enc(AvxRvm) , O_660F38(9D,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmadd213pd , "vfnmadd213pd" , Enc(AvxRvm_OptL) , O_660F38(AC,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmadd213ps , "vfnmadd213ps" , Enc(AvxRvm_OptL) , O_660F38(AC,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmadd213sd , "vfnmadd213sd" , Enc(AvxRvm) , O_660F38(AD,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmadd213ss , "vfnmadd213ss" , Enc(AvxRvm) , O_660F38(AD,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmadd231pd , "vfnmadd231pd" , Enc(AvxRvm_OptL) , O_660F38(BC,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmadd231ps , "vfnmadd231ps" , Enc(AvxRvm_OptL) , O_660F38(BC,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmadd231sd , "vfnmadd231sd" , Enc(AvxRvm) , O_660F38(BC,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmadd231ss , "vfnmadd231ss" , Enc(AvxRvm) , O_660F38(BC,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmaddpd , "vfnmaddpd" , Enc(Fma4_OptL) , O_660F3A(79,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfnmaddps , "vfnmaddps" , Enc(Fma4_OptL) , O_660F3A(78,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfnmaddsd , "vfnmaddsd" , Enc(Fma4) , O_660F3A(7B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfnmaddss , "vfnmaddss" , Enc(Fma4) , O_660F3A(7A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfnmsub132pd , "vfnmsub132pd" , Enc(AvxRvm_OptL) , O_660F38(9E,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmsub132ps , "vfnmsub132ps" , Enc(AvxRvm_OptL) , O_660F38(9E,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmsub132sd , "vfnmsub132sd" , Enc(AvxRvm) , O_660F38(9F,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmsub132ss , "vfnmsub132ss" , Enc(AvxRvm) , O_660F38(9F,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmsub213pd , "vfnmsub213pd" , Enc(AvxRvm_OptL) , O_660F38(AE,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmsub213ps , "vfnmsub213ps" , Enc(AvxRvm_OptL) , O_660F38(AE,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmsub213sd , "vfnmsub213sd" , Enc(AvxRvm) , O_660F38(AF,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmsub213ss , "vfnmsub213ss" , Enc(AvxRvm) , O_660F38(AF,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmsub231pd , "vfnmsub231pd" , Enc(AvxRvm_OptL) , O_660F38(BE,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmsub231ps , "vfnmsub231ps" , Enc(AvxRvm_OptL) , O_660F38(BE,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vfnmsub231sd , "vfnmsub231sd" , Enc(AvxRvm) , O_660F38(BF,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmsub231ss , "vfnmsub231ss" , Enc(AvxRvm) , O_660F38(BF,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vfnmsubpd , "vfnmsubpd" , Enc(Fma4_OptL) , O_660F3A(7D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfnmsubps , "vfnmsubps" , Enc(Fma4_OptL) , O_660F3A(7C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vfnmsubsd , "vfnmsubsd" , Enc(Fma4) , O_660F3A(7F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfnmsubss , "vfnmsubss" , Enc(Fma4) , O_660F3A(7E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vfrczpd , "vfrczpd" , Enc(XopRm_OptL) , O_00_M09(81,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vfrczps , "vfrczps" , Enc(XopRm_OptL) , O_00_M09(80,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vfrczsd , "vfrczsd" , Enc(XopRm) , O_00_M09(83,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vfrczss , "vfrczss" , Enc(XopRm) , O_00_M09(82,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vgatherdpd , "vgatherdpd" , Enc(AvxGather) , O_660F38(92,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , O(Xy) , U , U ), + INST(Vgatherdps , "vgatherdps" , Enc(AvxGather) , O_660F38(92,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , O(Xy) , U , U ), + INST(Vgatherqpd , "vgatherqpd" , Enc(AvxGather) , O_660F38(93,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , O(Xy) , U , U ), + INST(Vgatherqps , "vgatherqps" , Enc(AvxGatherEx) , O_660F38(93,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Mem) , O(Xmm) , U , U ), + INST(Vhaddpd , "vhaddpd" , Enc(AvxRvm_OptL) , O_660F00(7C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vhaddps , "vhaddps" , Enc(AvxRvm_OptL) , O_F20F00(7C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vhsubpd , "vhsubpd" , Enc(AvxRvm_OptL) , O_660F00(7D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vhsubps , "vhsubps" , Enc(AvxRvm_OptL) , O_F20F00(7D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vinsertf128 , "vinsertf128" , Enc(AvxRvmi) , O_660F3A(18,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Ymm) , O(XmmMem) , O(Imm) , U ), + INST(Vinserti128 , "vinserti128" , Enc(AvxRvmi) , O_660F3A(38,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Ymm) , O(XmmMem) , O(Imm) , U ), + INST(Vinsertps , "vinsertps" , Enc(AvxRvmi) , O_660F3A(21,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vlddqu , "vlddqu" , Enc(AvxRm_OptL) , O_F20F00(F0,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , U , U , U ), + INST(Vldmxcsr , "vldmxcsr" , Enc(AvxM) , O_000F00(AE,2,_,_,_), U , F(RO)|F(Avx)|F(Volatile) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Vmaskmovdqu , "vmaskmovdqu" , Enc(AvxRm) , O_660F00(F7,U,_,_,_), U , F(RO)|F(Avx)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , U , U , U ), + INST(Vmaskmovpd , "vmaskmovpd" , Enc(AvxRvmMvr_OptL) , O_660F38(2D,U,_,_,_), O_660F38(2F,U,_,_,_), F(RW)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(Xy) , O(XyMem) , U , U ), + INST(Vmaskmovps , "vmaskmovps" , Enc(AvxRvmMvr_OptL) , O_660F38(2C,U,_,_,_), O_660F38(2E,U,_,_,_), F(RW)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(Xy) , O(XyMem) , U , U ), + INST(Vmaxpd , "vmaxpd" , Enc(AvxRvm_OptL) , O_660F00(5F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vmaxps , "vmaxps" , Enc(AvxRvm_OptL) , O_000F00(5F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vmaxsd , "vmaxsd" , Enc(AvxRvm_OptL) , O_F20F00(5F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vmaxss , "vmaxss" , Enc(AvxRvm_OptL) , O_F30F00(5F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vminpd , "vminpd" , Enc(AvxRvm_OptL) , O_660F00(5D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vminps , "vminps" , Enc(AvxRvm_OptL) , O_000F00(5D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vminsd , "vminsd" , Enc(AvxRvm_OptL) , O_F20F00(5D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vminss , "vminss" , Enc(AvxRvm_OptL) , O_F30F00(5D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vmovapd , "vmovapd" , Enc(AvxRmMr_OptL) , O_660F00(28,U,_,_,_), O_660F00(29,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), + INST(Vmovaps , "vmovaps" , Enc(AvxRmMr_OptL) , O_000F00(28,U,_,_,_), O_000F00(29,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), + INST(Vmovd , "vmovd" , Enc(AvxMovDQ) , O_660F00(6E,U,_,_,_), O_660F00(7E,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Vmovddup , "vmovddup" , Enc(AvxRm_OptL) , O_F20F00(12,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vmovdqa , "vmovdqa" , Enc(AvxRmMr_OptL) , O_660F00(6F,U,_,_,_), O_660F00(7F,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), + INST(Vmovdqu , "vmovdqu" , Enc(AvxRmMr_OptL) , O_F30F00(6F,U,_,_,_), O_F30F00(7F,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), + INST(Vmovhlps , "vmovhlps" , Enc(AvxRvm) , O_000F00(12,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(Xmm) , U , U ), + INST(Vmovhpd , "vmovhpd" , Enc(AvxRvmMr) , O_660F00(16,U,_,_,_), O_660F00(17,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Xmm) , O(Mem) , U , U ), + INST(Vmovhps , "vmovhps" , Enc(AvxRvmMr) , O_000F00(16,U,_,_,_), O_000F00(17,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Xmm) , O(Mem) , U , U ), + INST(Vmovlhps , "vmovlhps" , Enc(AvxRvm) , O_000F00(16,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(Xmm) , U , U ), + INST(Vmovlpd , "vmovlpd" , Enc(AvxRvmMr) , O_660F00(12,U,_,_,_), O_660F00(13,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Xmm) , O(Mem) , U , U ), + INST(Vmovlps , "vmovlps" , Enc(AvxRvmMr) , O_000F00(12,U,_,_,_), O_000F00(13,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Xmm) , O(Mem) , U , U ), + INST(Vmovmskpd , "vmovmskpd" , Enc(AvxRm_OptL) , O_660F00(50,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Gqd) , O(Xy) , U , U , U ), + INST(Vmovmskps , "vmovmskps" , Enc(AvxRm_OptL) , O_000F00(50,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Gqd) , O(Xy) , U , U , U ), + INST(Vmovntdq , "vmovntdq" , Enc(AvxMr_OptL) , O_660F00(E7,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Mem) , O(Xy) , U , U , U ), + INST(Vmovntdqa , "vmovntdqa" , Enc(AvxRm_OptL) , O_660F38(2A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , U , U , U ), + INST(Vmovntpd , "vmovntpd" , Enc(AvxMr_OptL) , O_660F00(2B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Mem) , O(Xy) , U , U , U ), + INST(Vmovntps , "vmovntps" , Enc(AvxMr_OptL) , O_000F00(2B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Mem) , O(Xy) , U , U , U ), + INST(Vmovq , "vmovq" , Enc(AvxMovDQ) , O_660F00(6E,U,_,W,_), O_660F00(7E,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , U , U , U ), + INST(Vmovsd , "vmovsd" , Enc(AvxMovSsSd) , O_F20F00(10,U,_,_,_), O_F20F00(11,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , O(Xmm) , U , U ), + INST(Vmovshdup , "vmovshdup" , Enc(AvxRm_OptL) , O_F30F00(16,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vmovsldup , "vmovsldup" , Enc(AvxRm_OptL) , O_F30F00(12,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vmovss , "vmovss" , Enc(AvxMovSsSd) , O_F30F00(10,U,_,_,_), O_F30F00(11,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(Xmm) , O(Xmm) , U , U ), + INST(Vmovupd , "vmovupd" , Enc(AvxRmMr_OptL) , O_660F00(10,U,_,_,_), O_660F00(11,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), + INST(Vmovups , "vmovups" , Enc(AvxRmMr_OptL) , O_000F00(10,U,_,_,_), O_000F00(11,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), + INST(Vmpsadbw , "vmpsadbw" , Enc(AvxRvmi_OptL) , O_660F3A(42,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vmulpd , "vmulpd" , Enc(AvxRvm_OptL) , O_660F00(59,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vmulps , "vmulps" , Enc(AvxRvm_OptL) , O_000F00(59,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vmulsd , "vmulsd" , Enc(AvxRvm_OptL) , O_F20F00(59,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vmulss , "vmulss" , Enc(AvxRvm_OptL) , O_F30F00(59,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vorpd , "vorpd" , Enc(AvxRvm_OptL) , O_660F00(56,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vorps , "vorps" , Enc(AvxRvm_OptL) , O_000F00(56,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpabsb , "vpabsb" , Enc(AvxRm_OptL) , O_660F38(1C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpabsd , "vpabsd" , Enc(AvxRm_OptL) , O_660F38(1E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpabsw , "vpabsw" , Enc(AvxRm_OptL) , O_660F38(1D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpackssdw , "vpackssdw" , Enc(AvxRvm_OptL) , O_660F00(6B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpacksswb , "vpacksswb" , Enc(AvxRvm_OptL) , O_660F00(63,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpackusdw , "vpackusdw" , Enc(AvxRvm_OptL) , O_660F38(2B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpackuswb , "vpackuswb" , Enc(AvxRvm_OptL) , O_660F00(67,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddb , "vpaddb" , Enc(AvxRvm_OptL) , O_660F00(FC,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddd , "vpaddd" , Enc(AvxRvm_OptL) , O_660F00(FE,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddq , "vpaddq" , Enc(AvxRvm_OptL) , O_660F00(D4,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddsb , "vpaddsb" , Enc(AvxRvm_OptL) , O_660F00(EC,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddsw , "vpaddsw" , Enc(AvxRvm_OptL) , O_660F00(ED,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddusb , "vpaddusb" , Enc(AvxRvm_OptL) , O_660F00(DC,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddusw , "vpaddusw" , Enc(AvxRvm_OptL) , O_660F00(DD,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpaddw , "vpaddw" , Enc(AvxRvm_OptL) , O_660F00(FD,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpalignr , "vpalignr" , Enc(AvxRvmi_OptL) , O_660F3A(0F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vpand , "vpand" , Enc(AvxRvm_OptL) , O_660F00(DB,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpandn , "vpandn" , Enc(AvxRvm_OptL) , O_660F00(DF,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpavgb , "vpavgb" , Enc(AvxRvm_OptL) , O_660F00(E0,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpavgw , "vpavgw" , Enc(AvxRvm_OptL) , O_660F00(E3,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpblendd , "vpblendd" , Enc(AvxRvmi_OptL) , O_660F3A(02,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vpblendvb , "vpblendvb" , Enc(AvxRvmr) , O_660F3A(4C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Xy) , U ), + INST(Vpblendw , "vpblendw" , Enc(AvxRvmi_OptL) , O_660F3A(0E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vpbroadcastb , "vpbroadcastb" , Enc(AvxRm_OptL) , O_660F38(78,U,_,_,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vpbroadcastd , "vpbroadcastd" , Enc(AvxRm_OptL) , O_660F38(58,U,_,_,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vpbroadcastq , "vpbroadcastq" , Enc(AvxRm_OptL) , O_660F38(59,U,_,_,1), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vpbroadcastw , "vpbroadcastw" , Enc(AvxRm_OptL) , O_660F38(79,U,_,_,0), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XmmMem) , U , U , U ), + INST(Vpclmulqdq , "vpclmulqdq" , Enc(AvxRvmi) , O_660F3A(44,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcmov , "vpcmov" , Enc(XopRvrmRvmr_OptL), O_00_M08(A2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vpcmpeqb , "vpcmpeqb" , Enc(AvxRvm_OptL) , O_660F00(74,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpeqd , "vpcmpeqd" , Enc(AvxRvm_OptL) , O_660F00(76,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpeqq , "vpcmpeqq" , Enc(AvxRvm_OptL) , O_660F38(29,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpeqw , "vpcmpeqw" , Enc(AvxRvm_OptL) , O_660F00(75,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpestri , "vpcmpestri" , Enc(AvxRmi) , O_660F3A(61,U,_,_,_), U , F(WO)|F(Avx)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Vpcmpestrm , "vpcmpestrm" , Enc(AvxRmi) , O_660F3A(60,U,_,_,_), U , F(WO)|F(Avx)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Vpcmpgtb , "vpcmpgtb" , Enc(AvxRvm_OptL) , O_660F00(64,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpgtd , "vpcmpgtd" , Enc(AvxRvm_OptL) , O_660F00(66,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpgtq , "vpcmpgtq" , Enc(AvxRvm_OptL) , O_660F38(37,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpgtw , "vpcmpgtw" , Enc(AvxRvm_OptL) , O_660F00(65,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpcmpistri , "vpcmpistri" , Enc(AvxRmi) , O_660F3A(63,U,_,_,_), U , F(WO)|F(Avx)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Vpcmpistrm , "vpcmpistrm" , Enc(AvxRmi) , O_660F3A(62,U,_,_,_), U , F(WO)|F(Avx)|F(Special) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(Imm) , U , U ), + INST(Vpcomb , "vpcomb" , Enc(XopRvmi) , O_00_M08(CC,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcomd , "vpcomd" , Enc(XopRvmi) , O_00_M08(CE,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcomq , "vpcomq" , Enc(XopRvmi) , O_00_M08(CF,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcomub , "vpcomub" , Enc(XopRvmi) , O_00_M08(EC,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcomud , "vpcomud" , Enc(XopRvmi) , O_00_M08(EE,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcomuq , "vpcomuq" , Enc(XopRvmi) , O_00_M08(EF,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcomuw , "vpcomuw" , Enc(XopRvmi) , O_00_M08(ED,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vpcomw , "vpcomw" , Enc(XopRvmi) , O_00_M08(CD,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vperm2f128 , "vperm2f128" , Enc(AvxRvmi) , O_660F3A(06,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Ymm) , O(YmmMem) , O(Imm) , U ), + INST(Vperm2i128 , "vperm2i128" , Enc(AvxRvmi) , O_660F3A(46,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Ymm) , O(YmmMem) , O(Imm) , U ), + INST(Vpermd , "vpermd" , Enc(AvxRvm) , O_660F38(36,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Ymm) , O(YmmMem) , U , U ), + INST(Vpermil2pd , "vpermil2pd" , Enc(AvxRvrmRvmr_OptL), O_66_M03(49,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vpermil2ps , "vpermil2ps" , Enc(AvxRvrmRvmr_OptL), O_66_M03(48,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(XyMem) , U ), + INST(Vpermilpd , "vpermilpd" , Enc(AvxRvmRmi_OptL) , O_660F38(0D,U,_,_,_), O_660F3A(05,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpermilps , "vpermilps" , Enc(AvxRvmRmi_OptL) , O_660F38(0C,U,_,_,_), O_660F3A(04,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpermpd , "vpermpd" , Enc(AvxRmi) , O_660F3A(01,U,L,W,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(YmmMem) , O(Imm) , U , U ), + INST(Vpermps , "vpermps" , Enc(AvxRvm) , O_660F38(16,U,L,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(Ymm) , O(YmmMem) , U , U ), + INST(Vpermq , "vpermq" , Enc(AvxRmi) , O_660F3A(00,U,L,W,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Ymm) , O(YmmMem) , O(Imm) , U , U ), + INST(Vpextrb , "vpextrb" , Enc(AvxMri) , O_660F3A(14,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(GqdwbMem) , O(Xmm) , O(Imm) , U , U ), + INST(Vpextrd , "vpextrd" , Enc(AvxMri) , O_660F3A(16,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(GqdMem) , O(Xmm) , O(Imm) , U , U ), + INST(Vpextrq , "vpextrq" , Enc(AvxMri) , O_660F3A(16,U,_,W,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(GqMem) , O(Xmm) , O(Imm) , U , U ), + INST(Vpextrw , "vpextrw" , Enc(AvxMri) , O_660F3A(15,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(GqdwMem) , O(Xmm) , O(Imm) , U , U ), + INST(Vpgatherdd , "vpgatherdd" , Enc(AvxGather) , O_660F38(90,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , O(Xy) , U , U ), + INST(Vpgatherdq , "vpgatherdq" , Enc(AvxGather) , O_660F38(90,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , O(Xy) , U , U ), + INST(Vpgatherqd , "vpgatherqd" , Enc(AvxGatherEx) , O_660F38(91,U,_,_,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Mem) , O(Xmm) , U , U ), + INST(Vpgatherqq , "vpgatherqq" , Enc(AvxGather) , O_660F38(91,U,_,W,_), U , F(RW)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , O(Xy) , U , U ), + INST(Vphaddbd , "vphaddbd" , Enc(XopRm) , O_00_M09(C2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddbq , "vphaddbq" , Enc(XopRm) , O_00_M09(C3,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddbw , "vphaddbw" , Enc(XopRm) , O_00_M09(C1,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddd , "vphaddd" , Enc(AvxRvm_OptL) , O_660F38(02,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vphadddq , "vphadddq" , Enc(XopRm) , O_00_M09(CB,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddsw , "vphaddsw" , Enc(AvxRvm_OptL) , O_660F38(03,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vphaddubd , "vphaddubd" , Enc(XopRm) , O_00_M09(D2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddubq , "vphaddubq" , Enc(XopRm) , O_00_M09(D3,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddubw , "vphaddubw" , Enc(XopRm) , O_00_M09(D1,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddudq , "vphaddudq" , Enc(XopRm) , O_00_M09(DB,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphadduwd , "vphadduwd" , Enc(XopRm) , O_00_M09(D6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphadduwq , "vphadduwq" , Enc(XopRm) , O_00_M09(D7,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddw , "vphaddw" , Enc(AvxRvm_OptL) , O_660F38(01,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vphaddwd , "vphaddwd" , Enc(XopRm) , O_00_M09(C6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphaddwq , "vphaddwq" , Enc(XopRm) , O_00_M09(C7,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphminposuw , "vphminposuw" , Enc(AvxRm) , O_660F38(41,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphsubbw , "vphsubbw" , Enc(XopRm) , O_00_M09(E1,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphsubd , "vphsubd" , Enc(AvxRvm_OptL) , O_660F38(06,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vphsubdq , "vphsubdq" , Enc(XopRm) , O_00_M09(E3,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vphsubsw , "vphsubsw" , Enc(AvxRvm_OptL) , O_660F38(07,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vphsubw , "vphsubw" , Enc(AvxRvm_OptL) , O_660F38(05,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vphsubwd , "vphsubwd" , Enc(XopRm) , O_00_M09(E2,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vpinsrb , "vpinsrb" , Enc(AvxRvmi) , O_660F3A(20,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(GqdwbMem) , O(Imm) , U ), + INST(Vpinsrd , "vpinsrd" , Enc(AvxRvmi) , O_660F3A(22,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(GqdMem) , O(Imm) , U ), + INST(Vpinsrq , "vpinsrq" , Enc(AvxRvmi) , O_660F3A(22,U,_,W,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(GqMem) , O(Imm) , U ), + INST(Vpinsrw , "vpinsrw" , Enc(AvxRvmi) , O_660F00(C4,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(GqdwMem) , O(Imm) , U ), + INST(Vpmacsdd , "vpmacsdd" , Enc(XopRvmr) , O_00_M08(9E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacsdqh , "vpmacsdqh" , Enc(XopRvmr) , O_00_M08(9F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacsdql , "vpmacsdql" , Enc(XopRvmr) , O_00_M08(97,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacssdd , "vpmacssdd" , Enc(XopRvmr) , O_00_M08(8E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacssdqh , "vpmacssdqh" , Enc(XopRvmr) , O_00_M08(8F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacssdql , "vpmacssdql" , Enc(XopRvmr) , O_00_M08(87,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacsswd , "vpmacsswd" , Enc(XopRvmr) , O_00_M08(86,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacssww , "vpmacssww" , Enc(XopRvmr) , O_00_M08(85,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacswd , "vpmacswd" , Enc(XopRvmr) , O_00_M08(96,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmacsww , "vpmacsww" , Enc(XopRvmr) , O_00_M08(95,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmadcsswd , "vpmadcsswd" , Enc(XopRvmr) , O_00_M08(A6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmadcswd , "vpmadcswd" , Enc(XopRvmr) , O_00_M08(B6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Xmm) , U ), + INST(Vpmaddubsw , "vpmaddubsw" , Enc(AvxRvm_OptL) , O_660F38(04,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaddwd , "vpmaddwd" , Enc(AvxRvm_OptL) , O_660F00(F5,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaskmovd , "vpmaskmovd" , Enc(AvxRvmMvr_OptL) , O_660F38(8C,U,_,_,_), O_660F38(8E,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaskmovq , "vpmaskmovq" , Enc(AvxRvmMvr_OptL) , O_660F38(8C,U,_,W,_), O_660F38(8E,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaxsb , "vpmaxsb" , Enc(AvxRvm_OptL) , O_660F38(3C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaxsd , "vpmaxsd" , Enc(AvxRvm_OptL) , O_660F38(3D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaxsw , "vpmaxsw" , Enc(AvxRvm_OptL) , O_660F00(EE,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaxub , "vpmaxub" , Enc(AvxRvm_OptL) , O_660F00(DE,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaxud , "vpmaxud" , Enc(AvxRvm_OptL) , O_660F38(3F,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmaxuw , "vpmaxuw" , Enc(AvxRvm_OptL) , O_660F38(3E,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpminsb , "vpminsb" , Enc(AvxRvm_OptL) , O_660F38(38,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpminsd , "vpminsd" , Enc(AvxRvm_OptL) , O_660F38(39,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpminsw , "vpminsw" , Enc(AvxRvm_OptL) , O_660F00(EA,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpminub , "vpminub" , Enc(AvxRvm_OptL) , O_660F00(DA,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpminud , "vpminud" , Enc(AvxRvm_OptL) , O_660F38(3B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpminuw , "vpminuw" , Enc(AvxRvm_OptL) , O_660F38(3A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmovmskb , "vpmovmskb" , Enc(AvxRm_OptL) , O_660F00(D7,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Gqd) , O(Xy) , U , U , U ), + INST(Vpmovsxbd , "vpmovsxbd" , Enc(AvxRm_OptL) , O_660F38(21,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovsxbq , "vpmovsxbq" , Enc(AvxRm_OptL) , O_660F38(22,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovsxbw , "vpmovsxbw" , Enc(AvxRm_OptL) , O_660F38(20,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovsxdq , "vpmovsxdq" , Enc(AvxRm_OptL) , O_660F38(25,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovsxwd , "vpmovsxwd" , Enc(AvxRm_OptL) , O_660F38(23,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovsxwq , "vpmovsxwq" , Enc(AvxRm_OptL) , O_660F38(24,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovzxbd , "vpmovzxbd" , Enc(AvxRm_OptL) , O_660F38(31,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovzxbq , "vpmovzxbq" , Enc(AvxRm_OptL) , O_660F38(32,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovzxbw , "vpmovzxbw" , Enc(AvxRm_OptL) , O_660F38(30,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovzxdq , "vpmovzxdq" , Enc(AvxRm_OptL) , O_660F38(35,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovzxwd , "vpmovzxwd" , Enc(AvxRm_OptL) , O_660F38(33,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmovzxwq , "vpmovzxwq" , Enc(AvxRm_OptL) , O_660F38(34,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpmuldq , "vpmuldq" , Enc(AvxRvm_OptL) , O_660F38(28,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmulhrsw , "vpmulhrsw" , Enc(AvxRvm_OptL) , O_660F38(0B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmulhuw , "vpmulhuw" , Enc(AvxRvm_OptL) , O_660F00(E4,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmulhw , "vpmulhw" , Enc(AvxRvm_OptL) , O_660F00(E5,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmulld , "vpmulld" , Enc(AvxRvm_OptL) , O_660F38(40,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmullw , "vpmullw" , Enc(AvxRvm_OptL) , O_660F00(D5,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpmuludq , "vpmuludq" , Enc(AvxRvm_OptL) , O_660F00(F4,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpor , "vpor" , Enc(AvxRvm_OptL) , O_660F00(EB,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpperm , "vpperm" , Enc(XopRvrmRvmr) , O_00_M08(A3,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(XmmMem) , U ), + INST(Vprotb , "vprotb" , Enc(XopRvmRmi) , O_00_M09(90,U,_,_,_), O_00_M08(C0,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U ), + INST(Vprotd , "vprotd" , Enc(XopRvmRmi) , O_00_M09(92,U,_,_,_), O_00_M08(C2,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U ), + INST(Vprotq , "vprotq" , Enc(XopRvmRmi) , O_00_M09(93,U,_,_,_), O_00_M08(C3,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U ), + INST(Vprotw , "vprotw" , Enc(XopRvmRmi) , O_00_M09(91,U,_,_,_), O_00_M08(C1,U,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem)|O(Imm) , U , U ), + INST(Vpsadbw , "vpsadbw" , Enc(AvxRvm_OptL) , O_660F00(F6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpshab , "vpshab" , Enc(XopRvmRmv) , O_00_M09(98,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshad , "vpshad" , Enc(XopRvmRmv) , O_00_M09(9A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshaq , "vpshaq" , Enc(XopRvmRmv) , O_00_M09(9B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshaw , "vpshaw" , Enc(XopRvmRmv) , O_00_M09(99,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshlb , "vpshlb" , Enc(XopRvmRmv) , O_00_M09(94,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshld , "vpshld" , Enc(XopRvmRmv) , O_00_M09(96,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshlq , "vpshlq" , Enc(XopRvmRmv) , O_00_M09(97,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshlw , "vpshlw" , Enc(XopRvmRmv) , O_00_M09(95,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , O(XmmMem) , U , U ), + INST(Vpshufb , "vpshufb" , Enc(AvxRvm_OptL) , O_660F38(00,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpshufd , "vpshufd" , Enc(AvxRmi_OptL) , O_660F00(70,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(Imm) , U , U ), + INST(Vpshufhw , "vpshufhw" , Enc(AvxRmi_OptL) , O_F30F00(70,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(Imm) , U , U ), + INST(Vpshuflw , "vpshuflw" , Enc(AvxRmi_OptL) , O_F20F00(70,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(Imm) , U , U ), + INST(Vpsignb , "vpsignb" , Enc(AvxRvm_OptL) , O_660F38(08,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsignd , "vpsignd" , Enc(AvxRvm_OptL) , O_660F38(0A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsignw , "vpsignw" , Enc(AvxRvm_OptL) , O_660F38(09,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpslld , "vpslld" , Enc(AvxRvmVmi_OptL) , O_660F00(F2,U,_,_,_), O_660F00(72,6,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpslldq , "vpslldq" , Enc(AvxVmi_OptL) , O_660F00(73,7,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(Imm) , U , U ), + INST(Vpsllq , "vpsllq" , Enc(AvxRvmVmi_OptL) , O_660F00(F3,U,_,_,_), O_660F00(73,6,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpsllvd , "vpsllvd" , Enc(AvxRvm_OptL) , O_660F38(47,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsllvq , "vpsllvq" , Enc(AvxRvm_OptL) , O_660F38(47,U,_,W,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsllw , "vpsllw" , Enc(AvxRvmVmi_OptL) , O_660F00(F1,U,_,_,_), O_660F00(71,6,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpsrad , "vpsrad" , Enc(AvxRvmVmi_OptL) , O_660F00(E2,U,_,_,_), O_660F00(72,4,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpsravd , "vpsravd" , Enc(AvxRvm_OptL) , O_660F38(46,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsraw , "vpsraw" , Enc(AvxRvmVmi_OptL) , O_660F00(E1,U,_,_,_), O_660F00(71,4,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpsrld , "vpsrld" , Enc(AvxRvmVmi_OptL) , O_660F00(D2,U,_,_,_), O_660F00(72,2,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpsrldq , "vpsrldq" , Enc(AvxVmi_OptL) , O_660F00(73,3,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(Imm) , U , U ), + INST(Vpsrlq , "vpsrlq" , Enc(AvxRvmVmi_OptL) , O_660F00(D3,U,_,_,_), O_660F00(73,2,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpsrlvd , "vpsrlvd" , Enc(AvxRvm_OptL) , O_660F38(45,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsrlvq , "vpsrlvq" , Enc(AvxRvm_OptL) , O_660F38(45,U,_,W,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsrlw , "vpsrlw" , Enc(AvxRvmVmi_OptL) , O_660F00(D1,U,_,_,_), O_660F00(71,2,_,_,_), F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(XyMem)|O(Imm) , U , U ), + INST(Vpsubb , "vpsubb" , Enc(AvxRvm_OptL) , O_660F00(F8,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsubd , "vpsubd" , Enc(AvxRvm_OptL) , O_660F00(FA,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsubq , "vpsubq" , Enc(AvxRvm_OptL) , O_660F00(FB,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsubsb , "vpsubsb" , Enc(AvxRvm_OptL) , O_660F00(E8,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsubsw , "vpsubsw" , Enc(AvxRvm_OptL) , O_660F00(E9,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsubusb , "vpsubusb" , Enc(AvxRvm_OptL) , O_660F00(D8,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsubusw , "vpsubusw" , Enc(AvxRvm_OptL) , O_660F00(D9,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpsubw , "vpsubw" , Enc(AvxRvm_OptL) , O_660F00(F9,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vptest , "vptest" , Enc(AvxRm_OptL) , O_660F38(17,U,_,_,_), U , F(RO)|F(Avx) , EF(WWWWWW__), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vpunpckhbw , "vpunpckhbw" , Enc(AvxRvm_OptL) , O_660F00(68,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpunpckhdq , "vpunpckhdq" , Enc(AvxRvm_OptL) , O_660F00(6A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpunpckhqdq , "vpunpckhqdq" , Enc(AvxRvm_OptL) , O_660F00(6D,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpunpckhwd , "vpunpckhwd" , Enc(AvxRvm_OptL) , O_660F00(69,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpunpcklbw , "vpunpcklbw" , Enc(AvxRvm_OptL) , O_660F00(60,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpunpckldq , "vpunpckldq" , Enc(AvxRvm_OptL) , O_660F00(62,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpunpcklqdq , "vpunpcklqdq" , Enc(AvxRvm_OptL) , O_660F00(6C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpunpcklwd , "vpunpcklwd" , Enc(AvxRvm_OptL) , O_660F00(61,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vpxor , "vpxor" , Enc(AvxRvm_OptL) , O_660F00(EF,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vrcpps , "vrcpps" , Enc(AvxRm_OptL) , O_000F00(53,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vrcpss , "vrcpss" , Enc(AvxRvm) , O_F30F00(53,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vroundpd , "vroundpd" , Enc(AvxRmi_OptL) , O_660F3A(09,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(Imm) , U , U ), + INST(Vroundps , "vroundps" , Enc(AvxRmi_OptL) , O_660F3A(08,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , O(Imm) , U , U ), + INST(Vroundsd , "vroundsd" , Enc(AvxRvmi) , O_660F3A(0B,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vroundss , "vroundss" , Enc(AvxRvmi) , O_660F3A(0A,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , U ), + INST(Vrsqrtps , "vrsqrtps" , Enc(AvxRm_OptL) , O_000F00(52,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vrsqrtss , "vrsqrtss" , Enc(AvxRvm) , O_F30F00(52,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vshufpd , "vshufpd" , Enc(AvxRvmi_OptL) , O_660F00(C6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vshufps , "vshufps" , Enc(AvxRvmi_OptL) , O_000F00(C6,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , O(Imm) , U ), + INST(Vsqrtpd , "vsqrtpd" , Enc(AvxRm_OptL) , O_660F00(51,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vsqrtps , "vsqrtps" , Enc(AvxRm_OptL) , O_000F00(51,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vsqrtsd , "vsqrtsd" , Enc(AvxRvm) , O_F20F00(51,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vsqrtss , "vsqrtss" , Enc(AvxRvm) , O_F30F00(51,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vstmxcsr , "vstmxcsr" , Enc(AvxM) , O_000F00(AE,3,_,_,_), U , F(Volatile)|F(Avx) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Vsubpd , "vsubpd" , Enc(AvxRvm_OptL) , O_660F00(5C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vsubps , "vsubps" , Enc(AvxRvm_OptL) , O_000F00(5C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vsubsd , "vsubsd" , Enc(AvxRvm) , O_F20F00(5C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vsubss , "vsubss" , Enc(AvxRvm) , O_F30F00(5C,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xmm) , O(Xmm) , O(XmmMem) , U , U ), + INST(Vtestpd , "vtestpd" , Enc(AvxRm_OptL) , O_660F38(0F,U,_,_,_), U , F(RO)|F(Avx) , EF(WWWWWW__), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vtestps , "vtestps" , Enc(AvxRm_OptL) , O_660F38(0E,U,_,_,_), U , F(RO)|F(Avx) , EF(WWWWWW__), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), + INST(Vucomisd , "vucomisd" , Enc(AvxRm) , O_660F00(2E,U,_,_,_), U , F(RO)|F(Avx) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vucomiss , "vucomiss" , Enc(AvxRm) , O_000F00(2E,U,_,_,_), U , F(RO)|F(Avx) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Vunpckhpd , "vunpckhpd" , Enc(AvxRvm_OptL) , O_660F00(15,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vunpckhps , "vunpckhps" , Enc(AvxRvm_OptL) , O_000F00(15,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vunpcklpd , "vunpcklpd" , Enc(AvxRvm_OptL) , O_660F00(14,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vunpcklps , "vunpcklps" , Enc(AvxRvm_OptL) , O_000F00(14,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vxorpd , "vxorpd" , Enc(AvxRvm_OptL) , O_660F00(57,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vxorps , "vxorps" , Enc(AvxRvm_OptL) , O_000F00(57,U,_,_,_), U , F(WO)|F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), + INST(Vzeroall , "vzeroall" , Enc(AvxOp) , O_000F00(77,U,L,_,_), U , F(Volatile)|F(Avx) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Vzeroupper , "vzeroupper" , Enc(AvxOp) , O_000F00(77,U,_,_,_), U , F(Volatile)|F(Avx) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Wrfsbase , "wrfsbase" , Enc(X86Rm) , O_F30F00(AE,2,_,_,_), U , F(RO)|F(Volatile) , EF(________), 0 , 0 , O(Gqd) , U , U , U , U ), + INST(Wrgsbase , "wrgsbase" , Enc(X86Rm) , O_F30F00(AE,3,_,_,_), U , F(RO)|F(Volatile) , EF(________), 0 , 0 , O(Gqd) , U , U , U , U ), + INST(Xadd , "xadd" , Enc(X86Xadd) , O_000F00(C0,U,_,_,_), U , F(RW)|F(Xchg)|F(Lock) , EF(WWWWWW__), 0 , 0 , O(GqdwbMem) , O(Gqdwb) , U , U , U ), + INST(Xchg , "xchg" , Enc(X86Xchg) , O_000000(86,U,_,_,_), U , F(RW)|F(Xchg)|F(Lock) , EF(________), 0 , 0 , O(GqdwbMem) , O(Gqdwb) , U , U , U ), + INST(Xgetbv , "xgetbv" , Enc(X86Op) , O_000F01(D0,U,_,_,_), U , F(WO)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ), + INST(Xor , "xor" , Enc(X86Arith) , O_000000(30,6,_,_,_), U , F(RW)|F(Lock) , EF(WWWUWW__), 0 , 0 , O(GqdwbMem) , O(GqdwbMem)|O(Imm), U , U , U ), + INST(Xorpd , "xorpd" , Enc(SimdRm) , O_660F00(57,U,_,_,_), U , F(RW)|F(None) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Xorps , "xorps" , Enc(SimdRm) , O_000F00(57,U,_,_,_), U , F(RW)|F(None) , EF(________), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), + INST(Xrstor , "xrstor" , Enc(X86M) , O_000F00(AE,5,_,_,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Xrstor64 , "xrstor64" , Enc(X86M) , O_000F00(AE,5,_,W,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Xsave , "xsave" , Enc(X86M) , O_000F00(AE,4,_,_,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Xsave64 , "xsave64" , Enc(X86M) , O_000F00(AE,4,_,W,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Xsaveopt , "xsaveopt" , Enc(X86M) , O_000F00(AE,6,_,_,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Xsaveopt64 , "xsaveopt64" , Enc(X86M) , O_000F00(AE,6,_,W,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , O(Mem) , U , U , U , U ), + INST(Xsetbv , "xsetbv" , Enc(X86Op) , O_000F01(D1,U,_,_,_), U , F(RO)|F(Volatile)|F(Special) , EF(________), 0 , 0 , U , U , U , U , U ) +}; +#undef INST + +#undef O_00_X +#undef O_9B_X + +#undef O_66_M03 +#undef O_00_M09 +#undef O_00_M08 + +#undef O_F30F3A +#undef O_F30F38 +#undef O_F30F00 +#undef O_F30000 +#undef O_F20F3A +#undef O_F20F38 +#undef O_F20F00 +#undef O_F20000 +#undef O_9B0000 +#undef O_660F3A +#undef O_660F38 +#undef O_660F00 +#undef O_660000 +#undef O_000F3A +#undef O_000F38 +#undef O_000F0F +#undef O_000F01 +#undef O_000F00 +#undef O_000000 + +#undef Enc +#undef EF +#undef O +#undef A +#undef F +#undef U + +// ============================================================================ +// [asmjit::X86Cond] +// ============================================================================ + +#define CC_TO_INST(_Inst_) { \ + _Inst_##o , _Inst_##no , _Inst_##b , _Inst_##ae , \ + _Inst_##e , _Inst_##ne , _Inst_##be , _Inst_##a , \ + _Inst_##s , _Inst_##ns , _Inst_##pe , _Inst_##po , \ + _Inst_##l , _Inst_##ge , _Inst_##le , _Inst_##g , \ + kInstIdNone, kInstIdNone, kInstIdNone, kInstIdNone \ +} + +const uint32_t _x86ReverseCond[20] = { + /* O|NO|B|AE -> */ kX86CondO, kX86CondNO, kX86CondA , kX86CondBE, + /* E|NE|BE|A -> */ kX86CondE, kX86CondNE, kX86CondAE, kX86CondB , + /* S|NS|PE|PO -> */ kX86CondS, kX86CondNS, kX86CondPE, kX86CondPO, + /* L|GE|LE|G -> */ kX86CondG, kX86CondLE, kX86CondGE, kX86CondL , + /* Unord|!Unord -> */ kX86CondFpuUnordered , kX86CondFpuNotUnordered, 0x12, 0x13 +}; + +const uint32_t _x86CondToCmovcc[20] = CC_TO_INST(kX86InstIdCmov); +const uint32_t _x86CondToJcc [20] = CC_TO_INST(kX86InstIdJ ); +const uint32_t _x86CondToSetcc [20] = CC_TO_INST(kX86InstIdSet ); + +#undef CC_TO_INST + +// ============================================================================ +// [asmjit::X86Util] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_TEXT) +//! \internal +//! +//! Compare two instruction names. +//! +//! `a` is null terminated instruction name from `_x86InstNameData[]` table. +//! `b` is non-null terminated instruction name passed to `getInstIdByName()`. +static ASMJIT_INLINE int X86Util_cmpInstName(const char* a, const char* b, size_t len) noexcept { + for (size_t i = 0; i < len; i++) { + int c = static_cast(static_cast(a[i])) - + static_cast(static_cast(b[i])) ; + if (c != 0) + return c; + } + + return static_cast(a[len]); +} + +uint32_t X86Util::getInstIdByName(const char* name, size_t len) noexcept { + if (name == nullptr) + return kInstIdNone; + + if (len == kInvalidIndex) + len = ::strlen(name); + + if (len == 0) + return kInstIdNone; + + uint32_t prefix = name[0] - kX86InstAlphaIndexFirst; + if (prefix > kX86InstAlphaIndexLast - kX86InstAlphaIndexFirst) + return kInstIdNone; + + uint32_t index = _x86InstAlphaIndex[prefix]; + if (index == kX86InstAlphaIndexInvalid) + return kInstIdNone; + + const uint16_t* base = _x86InstNameIndex + index; + const uint16_t* end = _x86InstNameIndex + _kX86InstIdCount; + + // Handle instructions starting with 'j' specially. `jcc` instruction breaks + // the sorting, because of the suffixes (it's considered as one instruction), + // so basically `jecxz` and `jmp` are stored after all `jcc` instructions. + bool linearSearch = prefix == ('j' - kX86InstAlphaIndexFirst); + + while (++prefix <= kX86InstAlphaIndexLast - kX86InstAlphaIndexFirst) { + index = _x86InstAlphaIndex[prefix]; + if (index == kX86InstAlphaIndexInvalid) + continue; + end = _x86InstNameIndex + index; + break; + } + + if (linearSearch) { + while (base != end) { + if (X86Util_cmpInstName(_x86InstNameData + base[0], name, len) == 0) + return static_cast((size_t)(base - _x86InstNameIndex)); + base++; + } + } + else { + for (size_t lim = (size_t)(end - base); lim != 0; lim >>= 1) { + const uint16_t* cur = base + (lim >> 1); + int result = X86Util_cmpInstName(_x86InstNameData + cur[0], name, len); + + if (result < 0) { + base = cur + 1; + lim--; + continue; + } + + if (result > 0) + continue; + + return static_cast((size_t)(cur - _x86InstNameIndex)); + } + } + + return kInstIdNone; +} + +const char* X86Util::getInstNameById(uint32_t id) noexcept { + if (id >= _kX86InstIdCount) + return nullptr; + return _x86InstNameData + _x86InstNameIndex[id]; +} +#endif // ASMJIT_DISABLE_TEXT + +// ============================================================================ +// [asmjit::X86Util - Test] +// ============================================================================ + +#if defined(ASMJIT_TEST) && !defined(ASMJIT_DISABLE_TEXT) +UNIT(x86_inst_name) { + // All known instructions should be matched. + INFO("Matching all X86/X64 instructions."); + for (uint32_t a = 0; a < _kX86InstIdCount; a++) { + uint32_t b = X86Util::getInstIdByName(_x86InstNameData + _x86InstNameIndex[a]); + EXPECT(a == b, + "Should match existing instruction \"%s\" {id:%u} != \"%s\" {id:%u}.", + _x86InstNameData + _x86InstNameIndex[a], a, + _x86InstNameData + _x86InstNameIndex[b], b); + } + + // Everything else should return `kInstIdNone`. + INFO("Trying to look-up instructions that don't exist."); + EXPECT(X86Util::getInstIdByName(nullptr) == kInstIdNone, + "Should return kInstIdNone for `nullptr` input."); + + EXPECT(X86Util::getInstIdByName("") == kInstIdNone, + "Should return kInstIdNone for empty string."); + + EXPECT(X86Util::getInstIdByName("_") == kInstIdNone, + "Should return kInstIdNone for unknown instruction."); + + EXPECT(X86Util::getInstIdByName("123xyz") == kInstIdNone, + "Should return kInstIdNone for unknown instruction."); +} +#endif // ASMJIT_TEST && !ASMJIT_DISABLE_TEXT + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86inst.h b/DynamicHooks/thirdparty/AsmJit/x86/x86inst.h new file mode 100644 index 0000000..3fe391a --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86inst.h @@ -0,0 +1,2192 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86INST_H +#define _ASMJIT_X86_X86INST_H + +// [Dependencies] +#include "../base/assembler.h" +#include "../base/globals.h" +#include "../base/operand.h" +#include "../base/utils.h" +#include "../base/vectypes.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct X86InstInfo; +struct X86InstExtendedInfo; + +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::X86Inst/X86Cond - Globals] +// ============================================================================ + +//! \internal +//! +//! X86/X64 instructions' extended information, accessible through `X86InstInfo`. +ASMJIT_VARAPI const X86InstExtendedInfo _x86InstExtendedInfo[]; + +//! \internal +//! +//! X86/X64 instructions' information. +ASMJIT_VARAPI const X86InstInfo _x86InstInfo[]; + +//! \internal +//! +//! X86/X64 condition codes to reversed condition codes map. +ASMJIT_VARAPI const uint32_t _x86ReverseCond[20]; + +//! \internal +//! +//! X86/X64 condition codes to "cmovcc" group map. +ASMJIT_VARAPI const uint32_t _x86CondToCmovcc[20]; + +//! \internal +//! +//! X86/X64 condition codes to "jcc" group map. +ASMJIT_VARAPI const uint32_t _x86CondToJcc[20]; + +//! \internal +//! +//! X86/X64 condition codes to "setcc" group map. +ASMJIT_VARAPI const uint32_t _x86CondToSetcc[20]; + +// ============================================================================ +// [asmjit::X86InstId] +// ============================================================================ + +//! X86/X64 instruction IDs. +//! +//! Note that these instruction codes are AsmJit specific. Each instruction has +//! a unique ID that is used as an index to AsmJit instruction table. The list +//! is sorted alphabetically except instructions starting with `j`, because the +//! `jcc` instruction is composition of an opcode and condition code. It means +//! that these instructions are sorted as `jcc`, `jecxz` and `jmp`. Please use +//! \ref X86Util::getInstIdByName() if you need instruction name to ID mapping +//! and are not aware on how to handle such case. +ASMJIT_ENUM(X86InstId) { + kX86InstIdNone = 0, + kX86InstIdAdc, // X86/X64 + kX86InstIdAdcx, // ADX + kX86InstIdAdd, // X86/X64 + kX86InstIdAddpd, // SSE2 + kX86InstIdAddps, // SSE + kX86InstIdAddsd, // SSE2 + kX86InstIdAddss, // SSE + kX86InstIdAddsubpd, // SSE3 + kX86InstIdAddsubps, // SSE3 + kX86InstIdAdox, // ADX + kX86InstIdAesdec, // AESNI + kX86InstIdAesdeclast, // AESNI + kX86InstIdAesenc, // AESNI + kX86InstIdAesenclast, // AESNI + kX86InstIdAesimc, // AESNI + kX86InstIdAeskeygenassist, // AESNI + kX86InstIdAnd, // X86/X64 + kX86InstIdAndn, // BMI + kX86InstIdAndnpd, // SSE2 + kX86InstIdAndnps, // SSE + kX86InstIdAndpd, // SSE2 + kX86InstIdAndps, // SSE + kX86InstIdBextr, // BMI + kX86InstIdBlcfill, // TBM + kX86InstIdBlci, // TBM + kX86InstIdBlcic, // TBM + kX86InstIdBlcmsk, // TBM + kX86InstIdBlcs, // TBM + kX86InstIdBlendpd, // SSE4.1 + kX86InstIdBlendps, // SSE4.1 + kX86InstIdBlendvpd, // SSE4.1 + kX86InstIdBlendvps, // SSE4.1 + kX86InstIdBlsfill, // TBM + kX86InstIdBlsi, // BMI + kX86InstIdBlsic, // TBM + kX86InstIdBlsmsk, // BMI + kX86InstIdBlsr, // BMI + kX86InstIdBsf, // X86/X64 + kX86InstIdBsr, // X86/X64 + kX86InstIdBswap, // X86/X64 (i486+) + kX86InstIdBt, // X86/X64 + kX86InstIdBtc, // X86/X64 + kX86InstIdBtr, // X86/X64 + kX86InstIdBts, // X86/X64 + kX86InstIdBzhi, // BMI2 + kX86InstIdCall, // X86/X64 + kX86InstIdCbw, // X86/X64 + kX86InstIdCdq, // X86/X64 + kX86InstIdCdqe, // X64 only + kX86InstIdClc, // X86/X64 + kX86InstIdCld, // X86/X64 + kX86InstIdClflush, // CLFLUSH + kX86InstIdClflushopt, // CLFLUSH_OPT + kX86InstIdCmc, // X86/X64 + kX86InstIdCmova, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovae, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovb, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovbe, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovc, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmove, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovg, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovge, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovl, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovle, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovna, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnae, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnb, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnbe, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnc, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovne, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovng, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnge, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnl, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnle, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovno, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnp, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovns, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovnz, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovo, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovp, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovpe, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovpo, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovs, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmovz, // X86/X64 (cmovcc) (i586+) + kX86InstIdCmp, // X86/X64 + kX86InstIdCmppd, // SSE2 + kX86InstIdCmpps, // SSE + kX86InstIdCmpsB, // CMPS + kX86InstIdCmpsD, // CMPS + kX86InstIdCmpsQ, // CMPS (X64) + kX86InstIdCmpsW, // CMPS + kX86InstIdCmpsd, // SSE2 + kX86InstIdCmpss, // SSE + kX86InstIdCmpxchg, // X86/X64 (i486+) + kX86InstIdCmpxchg16b, // X64 only + kX86InstIdCmpxchg8b, // X86/X64 (i586+) + kX86InstIdComisd, // SSE2 + kX86InstIdComiss, // SSE + kX86InstIdCpuid, // X86/X64 (i486/i586+) + kX86InstIdCqo, // X64 only + kX86InstIdCrc32, // SSE4.2 + kX86InstIdCvtdq2pd, // SSE2 + kX86InstIdCvtdq2ps, // SSE2 + kX86InstIdCvtpd2dq, // SSE2 + kX86InstIdCvtpd2pi, // SSE2 + kX86InstIdCvtpd2ps, // SSE2 + kX86InstIdCvtpi2pd, // SSE2 + kX86InstIdCvtpi2ps, // SSE + kX86InstIdCvtps2dq, // SSE2 + kX86InstIdCvtps2pd, // SSE2 + kX86InstIdCvtps2pi, // SSE + kX86InstIdCvtsd2si, // SSE2 + kX86InstIdCvtsd2ss, // SSE2 + kX86InstIdCvtsi2sd, // SSE2 + kX86InstIdCvtsi2ss, // SSE + kX86InstIdCvtss2sd, // SSE2 + kX86InstIdCvtss2si, // SSE + kX86InstIdCvttpd2dq, // SSE2 + kX86InstIdCvttpd2pi, // SSE2 + kX86InstIdCvttps2dq, // SSE2 + kX86InstIdCvttps2pi, // SSE + kX86InstIdCvttsd2si, // SSE2 + kX86InstIdCvttss2si, // SSE + kX86InstIdCwd, // X86/X64 + kX86InstIdCwde, // X86/X64 + kX86InstIdDaa, // X86 only + kX86InstIdDas, // X86 only + kX86InstIdDec, // X86/X64 + kX86InstIdDiv, // X86/X64 + kX86InstIdDivpd, // SSE2 + kX86InstIdDivps, // SSE + kX86InstIdDivsd, // SSE2 + kX86InstIdDivss, // SSE + kX86InstIdDppd, // SSE4.1 + kX86InstIdDpps, // SSE4.1 + kX86InstIdEmms, // MMX + kX86InstIdEnter, // X86/X64 + kX86InstIdExtractps, // SSE4.1 + kX86InstIdExtrq, // SSE4a + kX86InstIdF2xm1, // FPU + kX86InstIdFabs, // FPU + kX86InstIdFadd, // FPU + kX86InstIdFaddp, // FPU + kX86InstIdFbld, // FPU + kX86InstIdFbstp, // FPU + kX86InstIdFchs, // FPU + kX86InstIdFclex, // FPU + kX86InstIdFcmovb, // FPU + kX86InstIdFcmovbe, // FPU + kX86InstIdFcmove, // FPU + kX86InstIdFcmovnb, // FPU + kX86InstIdFcmovnbe, // FPU + kX86InstIdFcmovne, // FPU + kX86InstIdFcmovnu, // FPU + kX86InstIdFcmovu, // FPU + kX86InstIdFcom, // FPU + kX86InstIdFcomi, // FPU + kX86InstIdFcomip, // FPU + kX86InstIdFcomp, // FPU + kX86InstIdFcompp, // FPU + kX86InstIdFcos, // FPU + kX86InstIdFdecstp, // FPU + kX86InstIdFdiv, // FPU + kX86InstIdFdivp, // FPU + kX86InstIdFdivr, // FPU + kX86InstIdFdivrp, // FPU + kX86InstIdFemms, // 3DNOW + kX86InstIdFfree, // FPU + kX86InstIdFiadd, // FPU + kX86InstIdFicom, // FPU + kX86InstIdFicomp, // FPU + kX86InstIdFidiv, // FPU + kX86InstIdFidivr, // FPU + kX86InstIdFild, // FPU + kX86InstIdFimul, // FPU + kX86InstIdFincstp, // FPU + kX86InstIdFinit, // FPU + kX86InstIdFist, // FPU + kX86InstIdFistp, // FPU + kX86InstIdFisttp, // SSE3 + kX86InstIdFisub, // FPU + kX86InstIdFisubr, // FPU + kX86InstIdFld, // FPU + kX86InstIdFld1, // FPU + kX86InstIdFldcw, // FPU + kX86InstIdFldenv, // FPU + kX86InstIdFldl2e, // FPU + kX86InstIdFldl2t, // FPU + kX86InstIdFldlg2, // FPU + kX86InstIdFldln2, // FPU + kX86InstIdFldpi, // FPU + kX86InstIdFldz, // FPU + kX86InstIdFmul, // FPU + kX86InstIdFmulp, // FPU + kX86InstIdFnclex, // FPU + kX86InstIdFninit, // FPU + kX86InstIdFnop, // FPU + kX86InstIdFnsave, // FPU + kX86InstIdFnstcw, // FPU + kX86InstIdFnstenv, // FPU + kX86InstIdFnstsw, // FPU + kX86InstIdFpatan, // FPU + kX86InstIdFprem, // FPU + kX86InstIdFprem1, // FPU + kX86InstIdFptan, // FPU + kX86InstIdFrndint, // FPU + kX86InstIdFrstor, // FPU + kX86InstIdFsave, // FPU + kX86InstIdFscale, // FPU + kX86InstIdFsin, // FPU + kX86InstIdFsincos, // FPU + kX86InstIdFsqrt, // FPU + kX86InstIdFst, // FPU + kX86InstIdFstcw, // FPU + kX86InstIdFstenv, // FPU + kX86InstIdFstp, // FPU + kX86InstIdFstsw, // FPU + kX86InstIdFsub, // FPU + kX86InstIdFsubp, // FPU + kX86InstIdFsubr, // FPU + kX86InstIdFsubrp, // FPU + kX86InstIdFtst, // FPU + kX86InstIdFucom, // FPU + kX86InstIdFucomi, // FPU + kX86InstIdFucomip, // FPU + kX86InstIdFucomp, // FPU + kX86InstIdFucompp, // FPU + kX86InstIdFwait, // FPU + kX86InstIdFxam, // FPU + kX86InstIdFxch, // FPU + kX86InstIdFxrstor, // FPU + kX86InstIdFxrstor64, // FPU (X64) + kX86InstIdFxsave, // FPU + kX86InstIdFxsave64, // FPU (X64) + kX86InstIdFxtract, // FPU + kX86InstIdFyl2x, // FPU + kX86InstIdFyl2xp1, // FPU + kX86InstIdHaddpd, // SSE3 + kX86InstIdHaddps, // SSE3 + kX86InstIdHsubpd, // SSE3 + kX86InstIdHsubps, // SSE3 + kX86InstIdIdiv, // X86/X64 + kX86InstIdImul, // X86/X64 + kX86InstIdInc, // X86/X64 + kX86InstIdInsertps, // SSE4.1 + kX86InstIdInsertq, // SSE4a + kX86InstIdInt, // X86/X64 + kX86InstIdJa, // X86/X64 (jcc) + kX86InstIdJae, // X86/X64 (jcc) + kX86InstIdJb, // X86/X64 (jcc) + kX86InstIdJbe, // X86/X64 (jcc) + kX86InstIdJc, // X86/X64 (jcc) + kX86InstIdJe, // X86/X64 (jcc) + kX86InstIdJg, // X86/X64 (jcc) + kX86InstIdJge, // X86/X64 (jcc) + kX86InstIdJl, // X86/X64 (jcc) + kX86InstIdJle, // X86/X64 (jcc) + kX86InstIdJna, // X86/X64 (jcc) + kX86InstIdJnae, // X86/X64 (jcc) + kX86InstIdJnb, // X86/X64 (jcc) + kX86InstIdJnbe, // X86/X64 (jcc) + kX86InstIdJnc, // X86/X64 (jcc) + kX86InstIdJne, // X86/X64 (jcc) + kX86InstIdJng, // X86/X64 (jcc) + kX86InstIdJnge, // X86/X64 (jcc) + kX86InstIdJnl, // X86/X64 (jcc) + kX86InstIdJnle, // X86/X64 (jcc) + kX86InstIdJno, // X86/X64 (jcc) + kX86InstIdJnp, // X86/X64 (jcc) + kX86InstIdJns, // X86/X64 (jcc) + kX86InstIdJnz, // X86/X64 (jcc) + kX86InstIdJo, // X86/X64 (jcc) + kX86InstIdJp, // X86/X64 (jcc) + kX86InstIdJpe, // X86/X64 (jcc) + kX86InstIdJpo, // X86/X64 (jcc) + kX86InstIdJs, // X86/X64 (jcc) + kX86InstIdJz, // X86/X64 (jcc) + kX86InstIdJecxz, // X86/X64 (jcxz/jecxz/jrcxz) + kX86InstIdJmp, // X86/X64 (jmp) + kX86InstIdLahf, // X86/X64 (LAHF/SAHF) + kX86InstIdLddqu, // SSE3 + kX86InstIdLdmxcsr, // SSE + kX86InstIdLea, // X86/X64 + kX86InstIdLeave, // X86/X64 + kX86InstIdLfence, // SSE2 + kX86InstIdLodsB, // LODS + kX86InstIdLodsD, // LODS + kX86InstIdLodsQ, // LODS (X64) + kX86InstIdLodsW, // LODS + kX86InstIdLzcnt, // LZCNT + kX86InstIdMaskmovdqu, // SSE2 + kX86InstIdMaskmovq, // MMX2 + kX86InstIdMaxpd, // SSE2 + kX86InstIdMaxps, // SSE + kX86InstIdMaxsd, // SSE2 + kX86InstIdMaxss, // SSE + kX86InstIdMfence, // SSE2 + kX86InstIdMinpd, // SSE2 + kX86InstIdMinps, // SSE + kX86InstIdMinsd, // SSE2 + kX86InstIdMinss, // SSE + kX86InstIdMonitor, // SSE3 + kX86InstIdMov, // X86/X64 + kX86InstIdMovPtr, // X86/X64 + kX86InstIdMovapd, // SSE2 + kX86InstIdMovaps, // SSE + kX86InstIdMovbe, // SSE3 (Atom) + kX86InstIdMovd, // MMX/SSE2 + kX86InstIdMovddup, // SSE3 + kX86InstIdMovdq2q, // SSE2 + kX86InstIdMovdqa, // SSE2 + kX86InstIdMovdqu, // SSE2 + kX86InstIdMovhlps, // SSE + kX86InstIdMovhpd, // SSE2 + kX86InstIdMovhps, // SSE + kX86InstIdMovlhps, // SSE + kX86InstIdMovlpd, // SSE2 + kX86InstIdMovlps, // SSE + kX86InstIdMovmskpd, // SSE2 + kX86InstIdMovmskps, // SSE2 + kX86InstIdMovntdq, // SSE2 + kX86InstIdMovntdqa, // SSE4.1 + kX86InstIdMovnti, // SSE2 + kX86InstIdMovntpd, // SSE2 + kX86InstIdMovntps, // SSE + kX86InstIdMovntq, // MMX2 + kX86InstIdMovntsd, // SSE4a + kX86InstIdMovntss, // SSE4a + kX86InstIdMovq, // MMX/SSE/SSE2 + kX86InstIdMovq2dq, // SSE2 + kX86InstIdMovsB, // MOVS + kX86InstIdMovsD, // MOVS + kX86InstIdMovsQ, // MOVS (X64) + kX86InstIdMovsW, // MOVS + kX86InstIdMovsd, // SSE2 + kX86InstIdMovshdup, // SSE3 + kX86InstIdMovsldup, // SSE3 + kX86InstIdMovss, // SSE + kX86InstIdMovsx, // X86/X64 + kX86InstIdMovsxd, // X86/X64 + kX86InstIdMovupd, // SSE2 + kX86InstIdMovups, // SSE + kX86InstIdMovzx, // X86/X64 + kX86InstIdMpsadbw, // SSE4.1 + kX86InstIdMul, // X86/X64 + kX86InstIdMulpd, // SSE2 + kX86InstIdMulps, // SSE + kX86InstIdMulsd, // SSE2 + kX86InstIdMulss, // SSE + kX86InstIdMulx, // BMI2 + kX86InstIdMwait, // SSE3 + kX86InstIdNeg, // X86/X64 + kX86InstIdNop, // X86/X64 + kX86InstIdNot, // X86/X64 + kX86InstIdOr, // X86/X64 + kX86InstIdOrpd, // SSE2 + kX86InstIdOrps, // SSE + kX86InstIdPabsb, // SSSE3 + kX86InstIdPabsd, // SSSE3 + kX86InstIdPabsw, // SSSE3 + kX86InstIdPackssdw, // MMX/SSE2 + kX86InstIdPacksswb, // MMX/SSE2 + kX86InstIdPackusdw, // SSE4.1 + kX86InstIdPackuswb, // MMX/SSE2 + kX86InstIdPaddb, // MMX/SSE2 + kX86InstIdPaddd, // MMX/SSE2 + kX86InstIdPaddq, // SSE2 + kX86InstIdPaddsb, // MMX/SSE2 + kX86InstIdPaddsw, // MMX/SSE2 + kX86InstIdPaddusb, // MMX/SSE2 + kX86InstIdPaddusw, // MMX/SSE2 + kX86InstIdPaddw, // MMX/SSE2 + kX86InstIdPalignr, // SSSE3 + kX86InstIdPand, // MMX/SSE2 + kX86InstIdPandn, // MMX/SSE2 + kX86InstIdPause, // SSE2. + kX86InstIdPavgb, // MMX2 + kX86InstIdPavgusb, // 3DNOW + kX86InstIdPavgw, // MMX2 + kX86InstIdPblendvb, // SSE4.1 + kX86InstIdPblendw, // SSE4.1 + kX86InstIdPclmulqdq, // PCLMULQDQ + kX86InstIdPcmpeqb, // MMX/SSE2 + kX86InstIdPcmpeqd, // MMX/SSE2 + kX86InstIdPcmpeqq, // SSE4.1 + kX86InstIdPcmpeqw, // MMX/SSE2 + kX86InstIdPcmpestri, // SSE4.2 + kX86InstIdPcmpestrm, // SSE4.2 + kX86InstIdPcmpgtb, // MMX/SSE2 + kX86InstIdPcmpgtd, // MMX/SSE2 + kX86InstIdPcmpgtq, // SSE4.2 + kX86InstIdPcmpgtw, // MMX/SSE2 + kX86InstIdPcmpistri, // SSE4.2 + kX86InstIdPcmpistrm, // SSE4.2 + kX86InstIdPdep, // BMI2 + kX86InstIdPext, // BMI2 + kX86InstIdPextrb, // SSE4.1 + kX86InstIdPextrd, // SSE4.1 + kX86InstIdPextrq, // SSE4.1 + kX86InstIdPextrw, // MMX2/SSE2 + kX86InstIdPf2id, // 3DNOW + kX86InstIdPf2iw, // 3DNOW2 + kX86InstIdPfacc, // 3DNOW + kX86InstIdPfadd, // 3DNOW + kX86InstIdPfcmpeq, // 3DNOW + kX86InstIdPfcmpge, // 3DNOW + kX86InstIdPfcmpgt, // 3DNOW + kX86InstIdPfmax, // 3DNOW + kX86InstIdPfmin, // 3DNOW + kX86InstIdPfmul, // 3DNOW + kX86InstIdPfnacc, // 3DNOW2 + kX86InstIdPfpnacc, // 3DNOW2 + kX86InstIdPfrcp, // 3DNOW + kX86InstIdPfrcpit1, // 3DNOW + kX86InstIdPfrcpit2, // 3DNOW + kX86InstIdPfrsqit1, // 3DNOW + kX86InstIdPfrsqrt, // 3DNOW + kX86InstIdPfsub, // 3DNOW + kX86InstIdPfsubr, // 3DNOW + kX86InstIdPhaddd, // SSSE3 + kX86InstIdPhaddsw, // SSSE3 + kX86InstIdPhaddw, // SSSE3 + kX86InstIdPhminposuw, // SSE4.1 + kX86InstIdPhsubd, // SSSE3 + kX86InstIdPhsubsw, // SSSE3 + kX86InstIdPhsubw, // SSSE3 + kX86InstIdPi2fd, // 3DNOW + kX86InstIdPi2fw, // 3DNOW2 + kX86InstIdPinsrb, // SSE4.1 + kX86InstIdPinsrd, // SSE4.1 + kX86InstIdPinsrq, // SSE4.1 + kX86InstIdPinsrw, // MMX2 + kX86InstIdPmaddubsw, // SSSE3 + kX86InstIdPmaddwd, // MMX/SSE2 + kX86InstIdPmaxsb, // SSE4.1 + kX86InstIdPmaxsd, // SSE4.1 + kX86InstIdPmaxsw, // MMX2 + kX86InstIdPmaxub, // MMX2 + kX86InstIdPmaxud, // SSE4.1 + kX86InstIdPmaxuw, // SSE4.1 + kX86InstIdPminsb, // SSE4.1 + kX86InstIdPminsd, // SSE4.1 + kX86InstIdPminsw, // MMX2 + kX86InstIdPminub, // MMX2 + kX86InstIdPminud, // SSE4.1 + kX86InstIdPminuw, // SSE4.1 + kX86InstIdPmovmskb, // MMX2 + kX86InstIdPmovsxbd, // SSE4.1 + kX86InstIdPmovsxbq, // SSE4.1 + kX86InstIdPmovsxbw, // SSE4.1 + kX86InstIdPmovsxdq, // SSE4.1 + kX86InstIdPmovsxwd, // SSE4.1 + kX86InstIdPmovsxwq, // SSE4.1 + kX86InstIdPmovzxbd, // SSE4.1 + kX86InstIdPmovzxbq, // SSE4.1 + kX86InstIdPmovzxbw, // SSE4.1 + kX86InstIdPmovzxdq, // SSE4.1 + kX86InstIdPmovzxwd, // SSE4.1 + kX86InstIdPmovzxwq, // SSE4.1 + kX86InstIdPmuldq, // SSE4.1 + kX86InstIdPmulhrsw, // SSSE3 + kX86InstIdPmulhrw, // 3DNOW + kX86InstIdPmulhuw, // MMX2 + kX86InstIdPmulhw, // MMX/SSE2 + kX86InstIdPmulld, // SSE4.1 + kX86InstIdPmullw, // MMX/SSE2 + kX86InstIdPmuludq, // SSE2 + kX86InstIdPop, // X86/X64 + kX86InstIdPopa, // X86 only + kX86InstIdPopcnt, // SSE4.2 + kX86InstIdPopf, // X86/X64 + kX86InstIdPor, // MMX/SSE2 + kX86InstIdPrefetch, // MMX2/SSE + kX86InstIdPrefetch3dNow, // 3DNOW + kX86InstIdPrefetchw, // PREFETCHW + kX86InstIdPrefetchwt1, // PREFETCHWT1 + kX86InstIdPsadbw, // MMX2 + kX86InstIdPshufb, // SSSE3 + kX86InstIdPshufd, // SSE2 + kX86InstIdPshufhw, // SSE2 + kX86InstIdPshuflw, // SSE2 + kX86InstIdPshufw, // MMX2 + kX86InstIdPsignb, // SSSE3 + kX86InstIdPsignd, // SSSE3 + kX86InstIdPsignw, // SSSE3 + kX86InstIdPslld, // MMX/SSE2 + kX86InstIdPslldq, // SSE2 + kX86InstIdPsllq, // MMX/SSE2 + kX86InstIdPsllw, // MMX/SSE2 + kX86InstIdPsrad, // MMX/SSE2 + kX86InstIdPsraw, // MMX/SSE2 + kX86InstIdPsrld, // MMX/SSE2 + kX86InstIdPsrldq, // SSE2 + kX86InstIdPsrlq, // MMX/SSE2 + kX86InstIdPsrlw, // MMX/SSE2 + kX86InstIdPsubb, // MMX/SSE2 + kX86InstIdPsubd, // MMX/SSE2 + kX86InstIdPsubq, // SSE2 + kX86InstIdPsubsb, // MMX/SSE2 + kX86InstIdPsubsw, // MMX/SSE2 + kX86InstIdPsubusb, // MMX/SSE2 + kX86InstIdPsubusw, // MMX/SSE2 + kX86InstIdPsubw, // MMX/SSE2 + kX86InstIdPswapd, // 3DNOW2 + kX86InstIdPtest, // SSE4.1 + kX86InstIdPunpckhbw, // MMX/SSE2 + kX86InstIdPunpckhdq, // MMX/SSE2 + kX86InstIdPunpckhqdq, // SSE2 + kX86InstIdPunpckhwd, // MMX/SSE2 + kX86InstIdPunpcklbw, // MMX/SSE2 + kX86InstIdPunpckldq, // MMX/SSE2 + kX86InstIdPunpcklqdq, // SSE2 + kX86InstIdPunpcklwd, // MMX/SSE2 + kX86InstIdPush, // X86/X64 + kX86InstIdPusha, // X86 only + kX86InstIdPushf, // X86/X64 + kX86InstIdPxor, // MMX/SSE2 + kX86InstIdRcl, // X86/X64 + kX86InstIdRcpps, // SSE + kX86InstIdRcpss, // SSE + kX86InstIdRcr, // X86/X64 + kX86InstIdRdfsbase, // FSGSBASE (X64) + kX86InstIdRdgsbase, // FSGSBASE (X64) + kX86InstIdRdrand, // RDRAND (RDRAND) + kX86InstIdRdseed, // RDSEED (RDSEED) + kX86InstIdRdtsc, // X86/X64 + kX86InstIdRdtscp, // X86/X64 + kX86InstIdRepLodsB, // X86/X64 (REP) + kX86InstIdRepLodsD, // X86/X64 (REP) + kX86InstIdRepLodsQ, // X64 only (REP) + kX86InstIdRepLodsW, // X86/X64 (REP) + kX86InstIdRepMovsB, // X86/X64 (REP) + kX86InstIdRepMovsD, // X86/X64 (REP) + kX86InstIdRepMovsQ, // X64 only (REP) + kX86InstIdRepMovsW, // X86/X64 (REP) + kX86InstIdRepStosB, // X86/X64 (REP) + kX86InstIdRepStosD, // X86/X64 (REP) + kX86InstIdRepStosQ, // X64 only (REP) + kX86InstIdRepStosW, // X86/X64 (REP) + kX86InstIdRepeCmpsB, // X86/X64 (REP) + kX86InstIdRepeCmpsD, // X86/X64 (REP) + kX86InstIdRepeCmpsQ, // X64 only (REP) + kX86InstIdRepeCmpsW, // X86/X64 (REP) + kX86InstIdRepeScasB, // X86/X64 (REP) + kX86InstIdRepeScasD, // X86/X64 (REP) + kX86InstIdRepeScasQ, // X64 only (REP) + kX86InstIdRepeScasW, // X86/X64 (REP) + kX86InstIdRepneCmpsB, // X86/X64 (REP) + kX86InstIdRepneCmpsD, // X86/X64 (REP) + kX86InstIdRepneCmpsQ, // X64 only (REP) + kX86InstIdRepneCmpsW, // X86/X64 (REP) + kX86InstIdRepneScasB, // X86/X64 (REP) + kX86InstIdRepneScasD, // X86/X64 (REP) + kX86InstIdRepneScasQ, // X64 only (REP) + kX86InstIdRepneScasW, // X86/X64 (REP) + kX86InstIdRet, // X86/X64 + kX86InstIdRol, // X86/X64 + kX86InstIdRor, // X86/X64 + kX86InstIdRorx, // BMI2 + kX86InstIdRoundpd, // SSE4.1 + kX86InstIdRoundps, // SSE4.1 + kX86InstIdRoundsd, // SSE4.1 + kX86InstIdRoundss, // SSE4.1 + kX86InstIdRsqrtps, // SSE + kX86InstIdRsqrtss, // SSE + kX86InstIdSahf, // X86/X64 (LAHF/SAHF) + kX86InstIdSal, // X86/X64 + kX86InstIdSar, // X86/X64 + kX86InstIdSarx, // BMI2 + kX86InstIdSbb, // X86/X64 + kX86InstIdScasB, // SCAS + kX86InstIdScasD, // SCAS + kX86InstIdScasQ, // SCAS (X64) + kX86InstIdScasW, // SCAS + kX86InstIdSeta, // X86/X64 (setcc) + kX86InstIdSetae, // X86/X64 (setcc) + kX86InstIdSetb, // X86/X64 (setcc) + kX86InstIdSetbe, // X86/X64 (setcc) + kX86InstIdSetc, // X86/X64 (setcc) + kX86InstIdSete, // X86/X64 (setcc) + kX86InstIdSetg, // X86/X64 (setcc) + kX86InstIdSetge, // X86/X64 (setcc) + kX86InstIdSetl, // X86/X64 (setcc) + kX86InstIdSetle, // X86/X64 (setcc) + kX86InstIdSetna, // X86/X64 (setcc) + kX86InstIdSetnae, // X86/X64 (setcc) + kX86InstIdSetnb, // X86/X64 (setcc) + kX86InstIdSetnbe, // X86/X64 (setcc) + kX86InstIdSetnc, // X86/X64 (setcc) + kX86InstIdSetne, // X86/X64 (setcc) + kX86InstIdSetng, // X86/X64 (setcc) + kX86InstIdSetnge, // X86/X64 (setcc) + kX86InstIdSetnl, // X86/X64 (setcc) + kX86InstIdSetnle, // X86/X64 (setcc) + kX86InstIdSetno, // X86/X64 (setcc) + kX86InstIdSetnp, // X86/X64 (setcc) + kX86InstIdSetns, // X86/X64 (setcc) + kX86InstIdSetnz, // X86/X64 (setcc) + kX86InstIdSeto, // X86/X64 (setcc) + kX86InstIdSetp, // X86/X64 (setcc) + kX86InstIdSetpe, // X86/X64 (setcc) + kX86InstIdSetpo, // X86/X64 (setcc) + kX86InstIdSets, // X86/X64 (setcc) + kX86InstIdSetz, // X86/X64 (setcc) + kX86InstIdSfence, // MMX2/SSE + kX86InstIdSha1msg1, // SHA + kX86InstIdSha1msg2, // SHA + kX86InstIdSha1nexte, // SHA + kX86InstIdSha1rnds4, // SHA + kX86InstIdSha256msg1, // SHA + kX86InstIdSha256msg2, // SHA + kX86InstIdSha256rnds2, // SHA + kX86InstIdShl, // X86/X64 + kX86InstIdShld, // X86/X64 + kX86InstIdShlx, // BMI2 + kX86InstIdShr, // X86/X64 + kX86InstIdShrd, // X86/X64 + kX86InstIdShrx, // BMI2 + kX86InstIdShufpd, // SSE2 + kX86InstIdShufps, // SSE + kX86InstIdSqrtpd, // SSE2 + kX86InstIdSqrtps, // SSE + kX86InstIdSqrtsd, // SSE2 + kX86InstIdSqrtss, // SSE + kX86InstIdStc, // X86/X64 + kX86InstIdStd, // X86/X64 + kX86InstIdStmxcsr, // SSE + kX86InstIdStosB, // STOS + kX86InstIdStosD, // STOS + kX86InstIdStosQ, // STOS (X64) + kX86InstIdStosW, // STOS + kX86InstIdSub, // X86/X64 + kX86InstIdSubpd, // SSE2 + kX86InstIdSubps, // SSE + kX86InstIdSubsd, // SSE2 + kX86InstIdSubss, // SSE + kX86InstIdT1mskc, // TBM + kX86InstIdTest, // X86/X64 + kX86InstIdTzcnt, // TZCNT + kX86InstIdTzmsk, // TBM + kX86InstIdUcomisd, // SSE2 + kX86InstIdUcomiss, // SSE + kX86InstIdUd2, // X86/X64 + kX86InstIdUnpckhpd, // SSE2 + kX86InstIdUnpckhps, // SSE + kX86InstIdUnpcklpd, // SSE2 + kX86InstIdUnpcklps, // SSE + kX86InstIdVaddpd, // AVX + kX86InstIdVaddps, // AVX + kX86InstIdVaddsd, // AVX + kX86InstIdVaddss, // AVX + kX86InstIdVaddsubpd, // AVX + kX86InstIdVaddsubps, // AVX + kX86InstIdVaesdec, // AVX+AESNI + kX86InstIdVaesdeclast, // AVX+AESNI + kX86InstIdVaesenc, // AVX+AESNI + kX86InstIdVaesenclast, // AVX+AESNI + kX86InstIdVaesimc, // AVX+AESNI + kX86InstIdVaeskeygenassist, // AVX+AESNI + kX86InstIdVandnpd, // AVX + kX86InstIdVandnps, // AVX + kX86InstIdVandpd, // AVX + kX86InstIdVandps, // AVX + kX86InstIdVblendpd, // AVX + kX86InstIdVblendps, // AVX + kX86InstIdVblendvpd, // AVX + kX86InstIdVblendvps, // AVX + kX86InstIdVbroadcastf128, // AVX + kX86InstIdVbroadcasti128, // AVX2 + kX86InstIdVbroadcastsd, // AVX/AVX2 + kX86InstIdVbroadcastss, // AVX/AVX2 + kX86InstIdVcmppd, // AVX + kX86InstIdVcmpps, // AVX + kX86InstIdVcmpsd, // AVX + kX86InstIdVcmpss, // AVX + kX86InstIdVcomisd, // AVX + kX86InstIdVcomiss, // AVX + kX86InstIdVcvtdq2pd, // AVX + kX86InstIdVcvtdq2ps, // AVX + kX86InstIdVcvtpd2dq, // AVX + kX86InstIdVcvtpd2ps, // AVX + kX86InstIdVcvtph2ps, // F16C + kX86InstIdVcvtps2dq, // AVX + kX86InstIdVcvtps2pd, // AVX + kX86InstIdVcvtps2ph, // F16C + kX86InstIdVcvtsd2si, // AVX + kX86InstIdVcvtsd2ss, // AVX + kX86InstIdVcvtsi2sd, // AVX + kX86InstIdVcvtsi2ss, // AVX + kX86InstIdVcvtss2sd, // AVX + kX86InstIdVcvtss2si, // AVX + kX86InstIdVcvttpd2dq, // AVX + kX86InstIdVcvttps2dq, // AVX + kX86InstIdVcvttsd2si, // AVX + kX86InstIdVcvttss2si, // AVX + kX86InstIdVdivpd, // AVX + kX86InstIdVdivps, // AVX + kX86InstIdVdivsd, // AVX + kX86InstIdVdivss, // AVX + kX86InstIdVdppd, // AVX + kX86InstIdVdpps, // AVX + kX86InstIdVextractf128, // AVX + kX86InstIdVextracti128, // AVX2 + kX86InstIdVextractps, // AVX + kX86InstIdVfmadd132pd, // FMA3 + kX86InstIdVfmadd132ps, // FMA3 + kX86InstIdVfmadd132sd, // FMA3 + kX86InstIdVfmadd132ss, // FMA3 + kX86InstIdVfmadd213pd, // FMA3 + kX86InstIdVfmadd213ps, // FMA3 + kX86InstIdVfmadd213sd, // FMA3 + kX86InstIdVfmadd213ss, // FMA3 + kX86InstIdVfmadd231pd, // FMA3 + kX86InstIdVfmadd231ps, // FMA3 + kX86InstIdVfmadd231sd, // FMA3 + kX86InstIdVfmadd231ss, // FMA3 + kX86InstIdVfmaddpd, // FMA4 + kX86InstIdVfmaddps, // FMA4 + kX86InstIdVfmaddsd, // FMA4 + kX86InstIdVfmaddss, // FMA4 + kX86InstIdVfmaddsub132pd, // FMA3 + kX86InstIdVfmaddsub132ps, // FMA3 + kX86InstIdVfmaddsub213pd, // FMA3 + kX86InstIdVfmaddsub213ps, // FMA3 + kX86InstIdVfmaddsub231pd, // FMA3 + kX86InstIdVfmaddsub231ps, // FMA3 + kX86InstIdVfmaddsubpd, // FMA4 + kX86InstIdVfmaddsubps, // FMA4 + kX86InstIdVfmsub132pd, // FMA3 + kX86InstIdVfmsub132ps, // FMA3 + kX86InstIdVfmsub132sd, // FMA3 + kX86InstIdVfmsub132ss, // FMA3 + kX86InstIdVfmsub213pd, // FMA3 + kX86InstIdVfmsub213ps, // FMA3 + kX86InstIdVfmsub213sd, // FMA3 + kX86InstIdVfmsub213ss, // FMA3 + kX86InstIdVfmsub231pd, // FMA3 + kX86InstIdVfmsub231ps, // FMA3 + kX86InstIdVfmsub231sd, // FMA3 + kX86InstIdVfmsub231ss, // FMA3 + kX86InstIdVfmsubadd132pd, // FMA3 + kX86InstIdVfmsubadd132ps, // FMA3 + kX86InstIdVfmsubadd213pd, // FMA3 + kX86InstIdVfmsubadd213ps, // FMA3 + kX86InstIdVfmsubadd231pd, // FMA3 + kX86InstIdVfmsubadd231ps, // FMA3 + kX86InstIdVfmsubaddpd, // FMA4 + kX86InstIdVfmsubaddps, // FMA4 + kX86InstIdVfmsubpd, // FMA4 + kX86InstIdVfmsubps, // FMA4 + kX86InstIdVfmsubsd, // FMA4 + kX86InstIdVfmsubss, // FMA4 + kX86InstIdVfnmadd132pd, // FMA3 + kX86InstIdVfnmadd132ps, // FMA3 + kX86InstIdVfnmadd132sd, // FMA3 + kX86InstIdVfnmadd132ss, // FMA3 + kX86InstIdVfnmadd213pd, // FMA3 + kX86InstIdVfnmadd213ps, // FMA3 + kX86InstIdVfnmadd213sd, // FMA3 + kX86InstIdVfnmadd213ss, // FMA3 + kX86InstIdVfnmadd231pd, // FMA3 + kX86InstIdVfnmadd231ps, // FMA3 + kX86InstIdVfnmadd231sd, // FMA3 + kX86InstIdVfnmadd231ss, // FMA3 + kX86InstIdVfnmaddpd, // FMA4 + kX86InstIdVfnmaddps, // FMA4 + kX86InstIdVfnmaddsd, // FMA4 + kX86InstIdVfnmaddss, // FMA4 + kX86InstIdVfnmsub132pd, // FMA3 + kX86InstIdVfnmsub132ps, // FMA3 + kX86InstIdVfnmsub132sd, // FMA3 + kX86InstIdVfnmsub132ss, // FMA3 + kX86InstIdVfnmsub213pd, // FMA3 + kX86InstIdVfnmsub213ps, // FMA3 + kX86InstIdVfnmsub213sd, // FMA3 + kX86InstIdVfnmsub213ss, // FMA3 + kX86InstIdVfnmsub231pd, // FMA3 + kX86InstIdVfnmsub231ps, // FMA3 + kX86InstIdVfnmsub231sd, // FMA3 + kX86InstIdVfnmsub231ss, // FMA3 + kX86InstIdVfnmsubpd, // FMA4 + kX86InstIdVfnmsubps, // FMA4 + kX86InstIdVfnmsubsd, // FMA4 + kX86InstIdVfnmsubss, // FMA4 + kX86InstIdVfrczpd, // XOP + kX86InstIdVfrczps, // XOP + kX86InstIdVfrczsd, // XOP + kX86InstIdVfrczss, // XOP + kX86InstIdVgatherdpd, // AVX2 + kX86InstIdVgatherdps, // AVX2 + kX86InstIdVgatherqpd, // AVX2 + kX86InstIdVgatherqps, // AVX2 + kX86InstIdVhaddpd, // AVX + kX86InstIdVhaddps, // AVX + kX86InstIdVhsubpd, // AVX + kX86InstIdVhsubps, // AVX + kX86InstIdVinsertf128, // AVX + kX86InstIdVinserti128, // AVX2 + kX86InstIdVinsertps, // AVX + kX86InstIdVlddqu, // AVX + kX86InstIdVldmxcsr, // AVX + kX86InstIdVmaskmovdqu, // AVX + kX86InstIdVmaskmovpd, // AVX + kX86InstIdVmaskmovps, // AVX + kX86InstIdVmaxpd, // AVX + kX86InstIdVmaxps, // AVX + kX86InstIdVmaxsd, // AVX + kX86InstIdVmaxss, // AVX + kX86InstIdVminpd, // AVX + kX86InstIdVminps, // AVX + kX86InstIdVminsd, // AVX + kX86InstIdVminss, // AVX + kX86InstIdVmovapd, // AVX + kX86InstIdVmovaps, // AVX + kX86InstIdVmovd, // AVX + kX86InstIdVmovddup, // AVX + kX86InstIdVmovdqa, // AVX + kX86InstIdVmovdqu, // AVX + kX86InstIdVmovhlps, // AVX + kX86InstIdVmovhpd, // AVX + kX86InstIdVmovhps, // AVX + kX86InstIdVmovlhps, // AVX + kX86InstIdVmovlpd, // AVX + kX86InstIdVmovlps, // AVX + kX86InstIdVmovmskpd, // AVX + kX86InstIdVmovmskps, // AVX + kX86InstIdVmovntdq, // AVX + kX86InstIdVmovntdqa, // AVX/AVX2 + kX86InstIdVmovntpd, // AVX + kX86InstIdVmovntps, // AVX + kX86InstIdVmovq, // AVX + kX86InstIdVmovsd, // AVX + kX86InstIdVmovshdup, // AVX + kX86InstIdVmovsldup, // AVX + kX86InstIdVmovss, // AVX + kX86InstIdVmovupd, // AVX + kX86InstIdVmovups, // AVX + kX86InstIdVmpsadbw, // AVX/AVX2 + kX86InstIdVmulpd, // AVX + kX86InstIdVmulps, // AVX + kX86InstIdVmulsd, // AVX + kX86InstIdVmulss, // AVX + kX86InstIdVorpd, // AVX + kX86InstIdVorps, // AVX + kX86InstIdVpabsb, // AVX2 + kX86InstIdVpabsd, // AVX2 + kX86InstIdVpabsw, // AVX2 + kX86InstIdVpackssdw, // AVX2 + kX86InstIdVpacksswb, // AVX2 + kX86InstIdVpackusdw, // AVX2 + kX86InstIdVpackuswb, // AVX2 + kX86InstIdVpaddb, // AVX2 + kX86InstIdVpaddd, // AVX2 + kX86InstIdVpaddq, // AVX2 + kX86InstIdVpaddsb, // AVX2 + kX86InstIdVpaddsw, // AVX2 + kX86InstIdVpaddusb, // AVX2 + kX86InstIdVpaddusw, // AVX2 + kX86InstIdVpaddw, // AVX2 + kX86InstIdVpalignr, // AVX2 + kX86InstIdVpand, // AVX2 + kX86InstIdVpandn, // AVX2 + kX86InstIdVpavgb, // AVX2 + kX86InstIdVpavgw, // AVX2 + kX86InstIdVpblendd, // AVX2 + kX86InstIdVpblendvb, // AVX2 + kX86InstIdVpblendw, // AVX2 + kX86InstIdVpbroadcastb, // AVX2 + kX86InstIdVpbroadcastd, // AVX2 + kX86InstIdVpbroadcastq, // AVX2 + kX86InstIdVpbroadcastw, // AVX2 + kX86InstIdVpclmulqdq, // AVX+PCLMULQDQ + kX86InstIdVpcmov, // XOP + kX86InstIdVpcmpeqb, // AVX2 + kX86InstIdVpcmpeqd, // AVX2 + kX86InstIdVpcmpeqq, // AVX2 + kX86InstIdVpcmpeqw, // AVX2 + kX86InstIdVpcmpestri, // AVX + kX86InstIdVpcmpestrm, // AVX + kX86InstIdVpcmpgtb, // AVX2 + kX86InstIdVpcmpgtd, // AVX2 + kX86InstIdVpcmpgtq, // AVX2 + kX86InstIdVpcmpgtw, // AVX2 + kX86InstIdVpcmpistri, // AVX + kX86InstIdVpcmpistrm, // AVX + kX86InstIdVpcomb, // XOP + kX86InstIdVpcomd, // XOP + kX86InstIdVpcomq, // XOP + kX86InstIdVpcomub, // XOP + kX86InstIdVpcomud, // XOP + kX86InstIdVpcomuq, // XOP + kX86InstIdVpcomuw, // XOP + kX86InstIdVpcomw, // XOP + kX86InstIdVperm2f128, // AVX + kX86InstIdVperm2i128, // AVX2 + kX86InstIdVpermd, // AVX2 + kX86InstIdVpermil2pd, // XOP + kX86InstIdVpermil2ps, // XOP + kX86InstIdVpermilpd, // AVX + kX86InstIdVpermilps, // AVX + kX86InstIdVpermpd, // AVX2 + kX86InstIdVpermps, // AVX2 + kX86InstIdVpermq, // AVX2 + kX86InstIdVpextrb, // AVX + kX86InstIdVpextrd, // AVX + kX86InstIdVpextrq, // AVX (X64) + kX86InstIdVpextrw, // AVX + kX86InstIdVpgatherdd, // AVX2 + kX86InstIdVpgatherdq, // AVX2 + kX86InstIdVpgatherqd, // AVX2 + kX86InstIdVpgatherqq, // AVX2 + kX86InstIdVphaddbd, // XOP + kX86InstIdVphaddbq, // XOP + kX86InstIdVphaddbw, // XOP + kX86InstIdVphaddd, // AVX2 + kX86InstIdVphadddq, // XOP + kX86InstIdVphaddsw, // AVX2 + kX86InstIdVphaddubd, // XOP + kX86InstIdVphaddubq, // XOP + kX86InstIdVphaddubw, // XOP + kX86InstIdVphaddudq, // XOP + kX86InstIdVphadduwd, // XOP + kX86InstIdVphadduwq, // XOP + kX86InstIdVphaddw, // AVX2 + kX86InstIdVphaddwd, // XOP + kX86InstIdVphaddwq, // XOP + kX86InstIdVphminposuw, // AVX + kX86InstIdVphsubbw, // XOP + kX86InstIdVphsubd, // AVX2 + kX86InstIdVphsubdq, // XOP + kX86InstIdVphsubsw, // AVX2 + kX86InstIdVphsubw, // AVX2 + kX86InstIdVphsubwd, // XOP + kX86InstIdVpinsrb, // AVX + kX86InstIdVpinsrd, // AVX + kX86InstIdVpinsrq, // AVX (X64) + kX86InstIdVpinsrw, // AVX + kX86InstIdVpmacsdd, // XOP + kX86InstIdVpmacsdqh, // XOP + kX86InstIdVpmacsdql, // XOP + kX86InstIdVpmacssdd, // XOP + kX86InstIdVpmacssdqh, // XOP + kX86InstIdVpmacssdql, // XOP + kX86InstIdVpmacsswd, // XOP + kX86InstIdVpmacssww, // XOP + kX86InstIdVpmacswd, // XOP + kX86InstIdVpmacsww, // XOP + kX86InstIdVpmadcsswd, // XOP + kX86InstIdVpmadcswd, // XOP + kX86InstIdVpmaddubsw, // AVX/AVX2 + kX86InstIdVpmaddwd, // AVX/AVX2 + kX86InstIdVpmaskmovd, // AVX2 + kX86InstIdVpmaskmovq, // AVX2 + kX86InstIdVpmaxsb, // AVX/AVX2 + kX86InstIdVpmaxsd, // AVX/AVX2 + kX86InstIdVpmaxsw, // AVX/AVX2 + kX86InstIdVpmaxub, // AVX/AVX2 + kX86InstIdVpmaxud, // AVX/AVX2 + kX86InstIdVpmaxuw, // AVX/AVX2 + kX86InstIdVpminsb, // AVX/AVX2 + kX86InstIdVpminsd, // AVX/AVX2 + kX86InstIdVpminsw, // AVX/AVX2 + kX86InstIdVpminub, // AVX/AVX2 + kX86InstIdVpminud, // AVX/AVX2 + kX86InstIdVpminuw, // AVX/AVX2 + kX86InstIdVpmovmskb, // AVX/AVX2 + kX86InstIdVpmovsxbd, // AVX/AVX2 + kX86InstIdVpmovsxbq, // AVX/AVX2 + kX86InstIdVpmovsxbw, // AVX/AVX2 + kX86InstIdVpmovsxdq, // AVX/AVX2 + kX86InstIdVpmovsxwd, // AVX/AVX2 + kX86InstIdVpmovsxwq, // AVX/AVX2 + kX86InstIdVpmovzxbd, // AVX/AVX2 + kX86InstIdVpmovzxbq, // AVX/AVX2 + kX86InstIdVpmovzxbw, // AVX/AVX2 + kX86InstIdVpmovzxdq, // AVX/AVX2 + kX86InstIdVpmovzxwd, // AVX/AVX2 + kX86InstIdVpmovzxwq, // AVX/AVX2 + kX86InstIdVpmuldq, // AVX/AVX2 + kX86InstIdVpmulhrsw, // AVX/AVX2 + kX86InstIdVpmulhuw, // AVX/AVX2 + kX86InstIdVpmulhw, // AVX/AVX2 + kX86InstIdVpmulld, // AVX/AVX2 + kX86InstIdVpmullw, // AVX/AVX2 + kX86InstIdVpmuludq, // AVX/AVX2 + kX86InstIdVpor, // AVX/AVX2 + kX86InstIdVpperm, // XOP + kX86InstIdVprotb, // XOP + kX86InstIdVprotd, // XOP + kX86InstIdVprotq, // XOP + kX86InstIdVprotw, // XOP + kX86InstIdVpsadbw, // AVX/AVX2 + kX86InstIdVpshab, // XOP + kX86InstIdVpshad, // XOP + kX86InstIdVpshaq, // XOP + kX86InstIdVpshaw, // XOP + kX86InstIdVpshlb, // XOP + kX86InstIdVpshld, // XOP + kX86InstIdVpshlq, // XOP + kX86InstIdVpshlw, // XOP + kX86InstIdVpshufb, // AVX/AVX2 + kX86InstIdVpshufd, // AVX/AVX2 + kX86InstIdVpshufhw, // AVX/AVX2 + kX86InstIdVpshuflw, // AVX/AVX2 + kX86InstIdVpsignb, // AVX/AVX2 + kX86InstIdVpsignd, // AVX/AVX2 + kX86InstIdVpsignw, // AVX/AVX2 + kX86InstIdVpslld, // AVX/AVX2 + kX86InstIdVpslldq, // AVX/AVX2 + kX86InstIdVpsllq, // AVX/AVX2 + kX86InstIdVpsllvd, // AVX2 + kX86InstIdVpsllvq, // AVX2 + kX86InstIdVpsllw, // AVX/AVX2 + kX86InstIdVpsrad, // AVX/AVX2 + kX86InstIdVpsravd, // AVX2 + kX86InstIdVpsraw, // AVX/AVX2 + kX86InstIdVpsrld, // AVX/AVX2 + kX86InstIdVpsrldq, // AVX/AVX2 + kX86InstIdVpsrlq, // AVX/AVX2 + kX86InstIdVpsrlvd, // AVX2 + kX86InstIdVpsrlvq, // AVX2 + kX86InstIdVpsrlw, // AVX/AVX2 + kX86InstIdVpsubb, // AVX/AVX2 + kX86InstIdVpsubd, // AVX/AVX2 + kX86InstIdVpsubq, // AVX/AVX2 + kX86InstIdVpsubsb, // AVX/AVX2 + kX86InstIdVpsubsw, // AVX/AVX2 + kX86InstIdVpsubusb, // AVX/AVX2 + kX86InstIdVpsubusw, // AVX/AVX2 + kX86InstIdVpsubw, // AVX/AVX2 + kX86InstIdVptest, // AVX + kX86InstIdVpunpckhbw, // AVX/AVX2 + kX86InstIdVpunpckhdq, // AVX/AVX2 + kX86InstIdVpunpckhqdq, // AVX/AVX2 + kX86InstIdVpunpckhwd, // AVX/AVX2 + kX86InstIdVpunpcklbw, // AVX/AVX2 + kX86InstIdVpunpckldq, // AVX/AVX2 + kX86InstIdVpunpcklqdq, // AVX/AVX2 + kX86InstIdVpunpcklwd, // AVX/AVX2 + kX86InstIdVpxor, // AVX/AVX2 + kX86InstIdVrcpps, // AVX + kX86InstIdVrcpss, // AVX + kX86InstIdVroundpd, // AVX + kX86InstIdVroundps, // AVX + kX86InstIdVroundsd, // AVX + kX86InstIdVroundss, // AVX + kX86InstIdVrsqrtps, // AVX + kX86InstIdVrsqrtss, // AVX + kX86InstIdVshufpd, // AVX + kX86InstIdVshufps, // AVX + kX86InstIdVsqrtpd, // AVX + kX86InstIdVsqrtps, // AVX + kX86InstIdVsqrtsd, // AVX + kX86InstIdVsqrtss, // AVX + kX86InstIdVstmxcsr, // AVX + kX86InstIdVsubpd, // AVX + kX86InstIdVsubps, // AVX + kX86InstIdVsubsd, // AVX + kX86InstIdVsubss, // AVX + kX86InstIdVtestpd, // AVX + kX86InstIdVtestps, // AVX + kX86InstIdVucomisd, // AVX + kX86InstIdVucomiss, // AVX + kX86InstIdVunpckhpd, // AVX + kX86InstIdVunpckhps, // AVX + kX86InstIdVunpcklpd, // AVX + kX86InstIdVunpcklps, // AVX + kX86InstIdVxorpd, // AVX + kX86InstIdVxorps, // AVX + kX86InstIdVzeroall, // AVX + kX86InstIdVzeroupper, // AVX + kX86InstIdWrfsbase, // FSGSBASE (X64) + kX86InstIdWrgsbase, // FSGSBASE (X64) + kX86InstIdXadd, // X86/X64 (i486+) + kX86InstIdXchg, // X86/X64 + kX86InstIdXgetbv, // XSAVE + kX86InstIdXor, // X86/X64 + kX86InstIdXorpd, // SSE2 + kX86InstIdXorps, // SSE + kX86InstIdXrstor, // XSAVE + kX86InstIdXrstor64, // XSAVE + kX86InstIdXsave, // XSAVE + kX86InstIdXsave64, // XSAVE + kX86InstIdXsaveopt, // XSAVE + kX86InstIdXsaveopt64, // XSAVE + kX86InstIdXsetbv, // XSAVE + + _kX86InstIdCount, + + _kX86InstIdCmovcc = kX86InstIdCmova, + _kX86InstIdJcc = kX86InstIdJa, + _kX86InstIdSetcc = kX86InstIdSeta, + + _kX86InstIdJbegin = kX86InstIdJa, + _kX86InstIdJend = kX86InstIdJmp +}; + +// ============================================================================ +// [asmjit::X86InstOptions] +// ============================================================================ + +//! X86/X64 instruction emit options, mainly for internal purposes. +ASMJIT_ENUM(X86InstOptions) { + kX86InstOptionRex = 0x00000040, //!< Force REX prefix (X64) + _kX86InstOptionNoRex = 0x00000080, //!< Do not use, internal of `X86Assembler`. + kX86InstOptionLock = 0x00000100, //!< Force LOCK prefix (lock-enabled instructions). + kX86InstOptionVex3 = 0x00000200, //!< Force 3-byte VEX prefix (AVX) + kX86InstOptionEvex = 0x00010000, //!< Force 4-byte EVEX prefix (AVX-512). + kX86InstOptionEvexZero = 0x00020000, //!< EVEX use zeroing instead of merging. + kX86InstOptionEvexOneN = 0x00040000, //!< EVEX broadcast the first element to all. + kX86InstOptionEvexSae = 0x00080000, //!< EVEX suppress all exceptions (SAE). + kX86InstOptionEvexRnSae = 0x00100000, //!< EVEX 'round-to-nearest' (even) and `SAE`. + kX86InstOptionEvexRdSae = 0x00200000, //!< EVEX 'round-down' (toward -inf) and 'SAE'. + kX86InstOptionEvexRuSae = 0x00400000, //!< EVEX 'round-up' (toward +inf) and 'SAE'. + kX86InstOptionEvexRzSae = 0x00800000 //!< EVEX 'round-toward-zero' (truncate) and 'SAE'. +}; + +// ============================================================================ +// [asmjit::X86InstEncoding] +// ============================================================================ + +//! \internal +//! +//! X86/X64 instruction groups. +//! +//! This group is specific to AsmJit and only used by `X86Assembler`. +ASMJIT_ENUM(X86InstEncoding) { + kX86InstEncodingNone = 0, //!< Never used. + + kX86InstEncodingX86Op, + kX86InstEncodingX86Op_66H, + kX86InstEncodingX86Rm, + kX86InstEncodingX86Rm_B, + kX86InstEncodingX86RmReg, + kX86InstEncodingX86RegRm, + kX86InstEncodingX86M, + kX86InstEncodingX86Arith, //!< X86 encoding - adc, add, and, cmp, or, sbb, sub, xor. + kX86InstEncodingX86BSwap, //!< X86 encoding - bswap. + kX86InstEncodingX86BTest, //!< X86 encoding - bt, btc, btr, bts. + kX86InstEncodingX86Call, //!< X86 encoding - call. + kX86InstEncodingX86Enter, //!< X86 encoding - enter. + kX86InstEncodingX86Imul, //!< X86 encoding - imul. + kX86InstEncodingX86IncDec, //!< X86 encoding - inc, dec. + kX86InstEncodingX86Int, //!< X86 encoding - int (interrupt). + kX86InstEncodingX86Jcc, //!< X86 encoding - jcc. + kX86InstEncodingX86Jecxz, //!< X86 encoding - jcxz, jecxz, jrcxz. + kX86InstEncodingX86Jmp, //!< X86 encoding - jmp. + kX86InstEncodingX86Lea, //!< X86 encoding - lea. + kX86InstEncodingX86Mov, //!< X86 encoding - mov. + kX86InstEncodingX86MovsxMovzx, //!< X86 encoding - movsx, movzx. + kX86InstEncodingX86Movsxd, //!< X86 encoding - movsxd. + kX86InstEncodingX86MovPtr, //!< X86 encoding - mov with absolute memory operand (x86/x64). + kX86InstEncodingX86Push, //!< X86 encoding - push. + kX86InstEncodingX86Pop, //!< X86 encoding - pop. + kX86InstEncodingX86Rep, //!< X86 encoding - rep|repe|repne lods?, movs?, stos?, cmps?, scas?. + kX86InstEncodingX86Ret, //!< X86 encoding - ret. + kX86InstEncodingX86Rot, //!< X86 encoding - rcl, rcr, rol, ror, sal, sar, shl, shr. + kX86InstEncodingX86Set, //!< X86 encoding - setcc. + kX86InstEncodingX86ShldShrd, //!< X86 encoding - shld, shrd. + kX86InstEncodingX86Test, //!< X86 encoding - test. + kX86InstEncodingX86Xadd, //!< X86 encoding - xadd. + kX86InstEncodingX86Xchg, //!< X86 encoding - xchg. + kX86InstEncodingX86Crc, //!< X86 encoding - crc32. + kX86InstEncodingX86Prefetch, //!< X86 encoding - prefetch. + kX86InstEncodingX86Fence, //!< X86 encoding - lfence, mfence, sfence. + kX86InstEncodingFpuOp, //!< FPU encoding - [OP]. + kX86InstEncodingFpuArith, //!< FPU encoding - fadd, fdiv, fdivr, fmul, fsub, fsubr. + kX86InstEncodingFpuCom, //!< FPU encoding - fcom, fcomp. + kX86InstEncodingFpuFldFst, //!< FPU encoding - fld, fst, fstp. + kX86InstEncodingFpuM, //!< FPU encoding - fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fist, fistp, fisttp, fisub, fisubr. + kX86InstEncodingFpuR, //!< FPU encoding - fcmov, fcomi, fcomip, ffree, fucom, fucomi, fucomip, fucomp, fxch. + kX86InstEncodingFpuRDef, //!< FPU encoding - faddp, fdivp, fdivrp, fmulp, fsubp, fsubrp. + kX86InstEncodingFpuStsw, //!< FPU encoding - fnstsw, Fstsw. + kX86InstEncodingSimdRm, //!< SIMD encoding - [RM]. + kX86InstEncodingSimdRm_P, //!< SIMD encoding - [RM] (propagates 66H if the instruction uses XMM register). + kX86InstEncodingSimdRm_Q, //!< SIMD encoding - [RM] (propagates REX.W if GPQ is used). + kX86InstEncodingSimdRm_PQ, //!< SIMD encoding - [RM] (propagates 66H and REX.W). + kX86InstEncodingSimdRmRi, //!< SIMD encoding - [RM|RI]. + kX86InstEncodingSimdRmRi_P, //!< SIMD encoding - [RM|RI] (propagates 66H if the instruction uses XMM register). + kX86InstEncodingSimdRmi, //!< SIMD encoding - [RMI]. + kX86InstEncodingSimdRmi_P, //!< SIMD encoding - [RMI] (propagates 66H if the instruction uses XMM register). + kX86InstEncodingSimdPextrw, //!< SIMD encoding - pextrw. + kX86InstEncodingSimdExtract, //!< SIMD encoding - pextrb, pextrd, pextrq, extractps. + kX86InstEncodingSimdMov, //!< SIMD encoding - mov - primary opcode means `(X)MM <- (X)MM/Mem`, secondary `(X)Mm/Mem <- (X)Mm format`. + kX86InstEncodingSimdMovNoRexW, //!< SIMD encoding - movmskpd, movmskps. + kX86InstEncodingSimdMovBe, //!< Used by movbe. + kX86InstEncodingSimdMovD, //!< SIMD encoding - movd. + kX86InstEncodingSimdMovQ, //!< SIMD encoding - movq. + kX86InstEncodingSimdExtrq, //!< SIMD encoding - extrq (SSE4a). + kX86InstEncodingSimdInsertq, //!< SIMD encoding - insrq (SSE4a). + kX86InstEncodingSimd3dNow, //!< SIMD encoding - 3dnow instructions. + kX86InstEncodingAvxOp, //!< AVX encoding - [OP]. + kX86InstEncodingAvxM, //!< AVX encoding - [M]. + kX86InstEncodingAvxMr, //!< AVX encoding - [MR]. + kX86InstEncodingAvxMr_OptL, //!< AVX encoding - [MR] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxMri, //!< AVX encoding - [MRI]. + kX86InstEncodingAvxMri_OptL, //!< AVX encoding - [MRI] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRm, //!< AVX encoding - [RM]. + kX86InstEncodingAvxRm_OptL, //!< AVX encoding - [RM] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRmi, //!< AVX encoding - [RMI]. + kX86InstEncodingAvxRmi_OptW, //!< AVX encoding - [RMI] (Propagates AVX.W if GPQ used). + kX86InstEncodingAvxRmi_OptL, //!< AVX encoding - [RMI] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRvm, //!< AVX encoding - [RVM]. + kX86InstEncodingAvxRvm_OptW, //!< AVX encoding - [RVM] (Propagates AVX.W if GPQ used). + kX86InstEncodingAvxRvm_OptL, //!< AVX encoding - [RVM] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRvmr, //!< AVX encoding - [RVMR]. + kX86InstEncodingAvxRvmr_OptL, //!< AVX encoding - [RVMR] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRvmi, //!< AVX encoding - [RVMI]. + kX86InstEncodingAvxRvmi_OptL, //!< AVX encoding - [RVMI] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRmv, //!< AVX encoding - [RMV]. + kX86InstEncodingAvxRmv_OptW, //!< AVX encoding - [RMV] (Propagates AVX.W if GPQ used). + kX86InstEncodingAvxRmvi, //!< AVX encoding - [RMVI]. + kX86InstEncodingAvxRmMr, //!< AVX encoding - [RM|MR]. + kX86InstEncodingAvxRmMr_OptL, //!< AVX encoding - [RM|MR] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRvmRmi, //!< AVX encoding - [RVM|RMI]. + kX86InstEncodingAvxRvmRmi_OptL, //!< AVX encoding - [RVM|RMI] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRvmMr, //!< AVX encoding - [RVM|MR]. + kX86InstEncodingAvxRvmMvr, //!< AVX encoding - [RVM|MVR]. + kX86InstEncodingAvxRvmMvr_OptL, //!< AVX encoding - [RVM|MVR] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRvmVmi, //!< AVX encoding - [RVM|VMI]. + kX86InstEncodingAvxRvmVmi_OptL, //!< AVX encoding - [RVM|VMI] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxVm, //!< AVX encoding - [VM]. + kX86InstEncodingAvxVm_OptW, //!< AVX encoding - [VM] (Propagates AVX.W if GPQ used). + kX86InstEncodingAvxVmi, //!< AVX encoding - [VMI]. + kX86InstEncodingAvxVmi_OptL, //!< AVX encoding - [VMI] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxRvrmRvmr, //!< AVX encoding - [RVRM|RVMR]. + kX86InstEncodingAvxRvrmRvmr_OptL, //!< AVX encoding - [RVRM|RVMR] (Propagates AVX.L if YMM used). + kX86InstEncodingAvxMovDQ, //!< AVX encoding - vmovd, vmovq. + kX86InstEncodingAvxMovSsSd, //!< AVX encoding - vmovss, vmovsd. + kX86InstEncodingAvxGather, //!< AVX encoding - gather (VSIB). + kX86InstEncodingAvxGatherEx, //!< AVX encoding - gather (VSIB), differs only in MEM operand. + kX86InstEncodingFma4, //!< FMA4 encoding - [R, R, R/M, R/M]. + kX86InstEncodingFma4_OptL, //!< FMA4 encoding - [R, R, R/M, R/M] (Propagates AVX.L if YMM used). + kX86InstEncodingXopRm, //!< XOP encoding - [RM]. + kX86InstEncodingXopRm_OptL, //!< XOP encoding - [RM] (Propagates AVX.L if YMM used). + kX86InstEncodingXopRvmRmv, //!< XOP encoding - [RVM | RMV]. + kX86InstEncodingXopRvmRmi, //!< XOP encoding - [RVM | RMI]. + kX86InstEncodingXopRvmr, //!< XOP encoding - [RVMR]. + kX86InstEncodingXopRvmr_OptL, //!< XOP encoding - [RVMR] (Propagates AVX.L if YMM used). + kX86InstEncodingXopRvmi, //!< XOP encoding - [RVMI]. + kX86InstEncodingXopRvmi_OptL, //!< XOP encoding - [RVMI] (Propagates AVX.L if YMM used). + kX86InstEncodingXopRvrmRvmr, //!< XOP encoding - [RVRM | RVMR]. + kX86InstEncodingXopRvrmRvmr_OptL, //!< XOP encoding - [RVRM | RVMR] (Propagates AVX.L if YMM used). + kX86InstEncodingXopVm_OptW, //!< XOP encoding - [VM]. + + _kX86InstEncodingCount //!< Count of X86 instruction encodings. +}; + +// ============================================================================ +// [asmjit::X86InstOpCodeFlags] +// ============================================================================ + +//! \internal +//! +//! X86/X64 Instruction opcode encoding used by asmjit 'X86InstInfo' table. +//! +//! This schema is AsmJit specific and has been designed to allow encoding of +//! all X86 instructions available. X86, MMX, and SSE+ instructions always use +//! `MMMMM` and `PP` fields, which are encoded to corresponding prefixes needed +//! by X86 or SIMD instructions. AVX+ instructions embed `MMMMM` and `PP` fields +//! in a VEX prefix. +//! +//! The instruction opcode definition uses 1 or 2 bytes as an opcode value. 1 +//! byte is needed by most of the instructions, 2 bytes are only used by legacy +//! X87-FPU instructions. This means that a second byte is free to by used by +//! AVX and AVX-512 instructions. +//! +//! The fields description: +//! +//! - `MMMMM` field is used to encode prefixes needed by the instruction or as +//! a part of VEX/EVEX prefix. +//! +//! - `PP` field is used to encode prefixes needed by the instruction or as a +//! part of VEX/EVEX prefix. +//! +//! - `L` field is used exclusively by AVX+ and AVX512+ instruction sets. It +//! describes vector size, which is 128-bit for XMM register `L_128`, 256 +//! for YMM register `L_256` and 512-bit for ZMM register `L_512`. The `L` +//! field is omitted in case that instruction supports multiple vector lengths, +//! however, if the instruction requires specific `L` value it's specified as +//! a part of the opcode. +//! +//! - `W` field is the most complicated. It was added by 64-bit architecture +//! to promote default operation width (instructions that perform 32-bit +//! operation by default require to override the width to 64-bit explicitly). +//! There is nothing wrong on this, however, some instructions introduced +//! implicit `W` override, for example a `cdqe` instruction is basically a +//! `cwde` instruction with overridden `W` (set to 1). There are some others +//! in the base X86 instruction set. More recent instruction sets started +//! using `W` field more often: +//! +//! - AVX instructions started using `W` field as an extended opcode for FMA, +//! GATHER, PERM, and other instructions. It also uses `W` field to override +//! the default operation width in instructions like `vmovq`. +//! +//! - AVX-512 instructions started using `W` field as an extended opcode for +//! all new instructions. This wouldn't have been an issue if the `W` field +//! of AVX-512 have matched AVX, but this is not always the case. +//! +//! - `O` field is an extended opcode field (3) bytes used by ModR/M BYTE. +ASMJIT_ENUM(X86InstOpCodeFlags) { + // `MMMMM` field in AVX/XOP/AVX-512 instruction (5 bits). + // + // `OpCode` leading bytes in legacy encoding. + // + // AVX reserves 5 bits for `MMMMM` field, however AVX instructions only use + // 2 bits and XOP 4 bits. AVX-512 shrinks `MMMMM` field into `MM` so it's + // safe to assume that `MM` field won't grow in the future as EVEX doesn't + // use more than 2 bits. There is always a way how a fifth bit can be stored + // if needed. + kX86InstOpCode_MM_Shift = 16, + kX86InstOpCode_MM_Mask = 0x0FU << kX86InstOpCode_MM_Shift, + kX86InstOpCode_MM_00 = 0x00U << kX86InstOpCode_MM_Shift, + kX86InstOpCode_MM_0F = 0x01U << kX86InstOpCode_MM_Shift, + kX86InstOpCode_MM_0F38 = 0x02U << kX86InstOpCode_MM_Shift, + kX86InstOpCode_MM_0F3A = 0x03U << kX86InstOpCode_MM_Shift, + kX86InstOpCode_MM_00011 = 0x03U << kX86InstOpCode_MM_Shift, // XOP. + kX86InstOpCode_MM_01000 = 0x08U << kX86InstOpCode_MM_Shift, // XOP. + kX86InstOpCode_MM_01001 = 0x09U << kX86InstOpCode_MM_Shift, // XOP. + kX86InstOpCode_MM_0F01 = 0x0FU << kX86InstOpCode_MM_Shift, // AsmJit specific, not part of AVX. + + // `PP` field in AVX/XOP/AVX-512 instruction. + // + // `Mandatory Prefix` in legacy encoding. + // + // AVX reserves 2 bits for `PP` field, but AsmJit extends the storage by 1 + // more bit that is used to emit 9B prefix for some X87-FPU instructions. + kX86InstOpCode_PP_Shift = 20, + kX86InstOpCode_PP_Mask = 0x07U << kX86InstOpCode_PP_Shift, + kX86InstOpCode_PP_00 = 0x00U << kX86InstOpCode_PP_Shift, + kX86InstOpCode_PP_66 = 0x01U << kX86InstOpCode_PP_Shift, + kX86InstOpCode_PP_F3 = 0x02U << kX86InstOpCode_PP_Shift, + kX86InstOpCode_PP_F2 = 0x03U << kX86InstOpCode_PP_Shift, + kX86InstOpCode_PP_9B = 0x07U << kX86InstOpCode_PP_Shift, // AsmJit specific, not part of AVX. + + // `L` field in AVX/XOP/AVX-512 instruction. + // + // AVX/XOP can only use the first bit `L.128` or `L.256`. AVX-512 makes it + // possible to use also `L.512`. + + // NOTE: If the instruction set manual describes an instruction by using `LIG` + // it means that the `L` field is ignored. AsmJit emits `0` in such case. + kX86InstOpCode_L_Shift = 23, + kX86InstOpCode_L_Mask = 0x03U << kX86InstOpCode_L_Shift, + kX86InstOpCode_L_128 = 0x00U << kX86InstOpCode_L_Shift, + kX86InstOpCode_L_256 = 0x01U << kX86InstOpCode_L_Shift, + kX86InstOpCode_L_512 = 0x02U << kX86InstOpCode_L_Shift, + + // `O` field (ModR/M). + kX86InstOpCode_O_Shift = 25, + kX86InstOpCode_O_Mask = 0x07U << kX86InstOpCode_O_Shift, + + // `W` field used by EVEX instruction encoding. + kX86InstOpCode_EW_Shift = 30, + kX86InstOpCode_EW_Mask = 0x01U << kX86InstOpCode_EW_Shift, + kX86InstOpCode_EW = 0x01U << kX86InstOpCode_EW_Shift, + + // `W` field used by REX/VEX instruction encoding. + // + // NOTE: If the instruction set manual describes an instruction by using `WIG` + // it means that the `W` field is ignored. AsmJit emits `0` in such case. + kX86InstOpCode_W_Shift = 31, + kX86InstOpCode_W_Mask = 0x01U << kX86InstOpCode_W_Shift, + kX86InstOpCode_W = 0x01U << kX86InstOpCode_W_Shift, +}; + +// ============================================================================ +// [asmjit::X86InstFlags] +// ============================================================================ + +//! \internal +//! +//! X86/X64 instruction flags. +ASMJIT_ENUM(X86InstFlags) { + kX86InstFlagNone = 0x00000000, //!< No flags. + + kX86InstFlagRO = 0x00000001, //!< The first operand is read (read-only without `kX86InstFlagWO`). + kX86InstFlagWO = 0x00000002, //!< The first operand is written (write-only without `kX86InstFlagRO`). + kX86InstFlagRW = 0x00000003, //!< The first operand is read-write. + + kX86InstFlagXchg = 0x00000004, //!< Instruction is an exchange like instruction (xchg, xadd). + kX86InstFlagFlow = 0x00000008, //!< Control-flow instruction (jmp, jcc, call, ret). + + kX86InstFlagFp = 0x00000010, //!< Instruction accesses FPU register(s). + kX86InstFlagLock = 0x00000020, //!< Instruction can be prefixed by using the LOCK prefix. + kX86InstFlagSpecial = 0x00000040, //!< Instruction requires special handling (implicit operands), used by \ref Compiler. + + //! Instruction always performs memory access. + //! + //! This flag is always combined with `kX86InstFlagSpecial` and describes + //! that there is an implicit address which is accessed (usually EDI/RDI + //! and/or ESI/RSI). + kX86InstFlagSpecialMem = 0x00000080, + + kX86InstFlagMem2 = 0x00000100, //!< Instruction memory operand can refer to 16-bit address (used by FPU). + kX86InstFlagMem4 = 0x00000200, //!< Instruction memory operand can refer to 32-bit address (used by FPU). + kX86InstFlagMem8 = 0x00000400, //!< Instruction memory operand can refer to 64-bit address (used by FPU). + kX86InstFlagMem10 = 0x00000800, //!< Instruction memory operand can refer to 80-bit address (used by FPU). + + kX86InstFlagZeroIfMem = 0x00001000, //!< Cleans the rest of destination if source is memory (movss, movsd). + kX86InstFlagVolatile = 0x00002000, //!< Hint for instruction scheduler to not reorder this instruction. + + kX86InstFlagAvx = 0x00010000, //!< AVX/AVX2 instruction. + kX86InstFlagXop = 0x00020000, //!< XOP instruction. + + kX86InstFlagAvx512F = 0x00100000, //!< Supported by AVX-512 F (ZMM). + kX86InstFlagAvx512CD = 0x00200000, //!< Supported by AVX-512 CD (ZMM). + kX86InstFlagAvx512PF = 0x00400000, //!< Supported by AVX-512 PF (ZMM). + kX86InstFlagAvx512ER = 0x00800000, //!< Supported by AVX-512 ER (ZMM). + kX86InstFlagAvx512DQ = 0x01000000, //!< Supported by AVX-512 DQ (ZMM). + kX86InstFlagAvx512BW = 0x02000000, //!< Supported by AVX-512 BW (ZMM). + kX86InstFlagAvx512VL = 0x04000000, //!< Supported by AVX-512 VL (XMM/YMM). + + kX86InstFlagAvx512KMask = 0x08000000, //!< Supports masking {k0..k7}. + kX86InstFlagAvx512KZero = 0x10000000, //!< Supports zeroing of elements {k0z..k7z}. + kX86InstFlagAvx512BCast = 0x20000000, //!< Supports broadcast {1..N}. + kX86InstFlagAvx512Sae = 0x40000000, //!< Supports suppressing all exceptions {sae}. + kX86InstFlagAvx512Rnd = 0x80000000 //!< Supports static rounding control & SAE {rnd-sae}, +}; + +// ============================================================================ +// [asmjit::X86InstOp] +// ============================================================================ + +//! \internal +//! +//! X86/X64 instruction operand flags. +ASMJIT_ENUM(X86InstOp) { + kX86InstOpGb = 0x0001, //!< Operand can be 8-bit GPB register. + kX86InstOpGw = 0x0002, //!< Operand can be 16-bit GPW register. + kX86InstOpGd = 0x0004, //!< Operand operand can be 32-bit GPD register. + kX86InstOpGq = 0x0008, //!< Operand can be 64-bit GPQ register. + kX86InstOpFp = 0x0010, //!< Operand can be FPU register. + kX86InstOpMm = 0x0020, //!< Operand can be 64-bit MMX register. + kX86InstOpK = 0x0040, //!< Operand can be 64-bit K register. + + kX86InstOpXmm = 0x0100, //!< Operand can be 128-bit XMM register. + kX86InstOpYmm = 0x0200, //!< Operand can be 256-bit YMM register. + kX86InstOpZmm = 0x0400, //!< Operand can be 512-bit ZMM register. + + kX86InstOpMem = 0x1000, //!< Operand can be memory. + kX86InstOpImm = 0x2000, //!< Operand can be immediate. + kX86InstOpLabel = 0x4000, //!< Operand can be label. + + //! Instruction operand doesn't have to be used. + //! + //! NOTE: If no operand is specified the meaning is clear (the operand at the + //! particular index doesn't exist), however, when one or more operand is + //! specified, it's not clear whether the operand can be omitted or not. When + //! `kX86InstOpNone` is used it means that the operand is not used in some + //! cases. + kX86InstOpNone = 0x8000 +}; + +// ============================================================================ +// [asmjit::X86Cond] +// ============================================================================ + +//! X86/X64 Condition codes. +ASMJIT_ENUM(X86Cond) { + kX86CondA = 0x07, // CF==0 & ZF==0 (unsigned) + kX86CondAE = 0x03, // CF==0 (unsigned) + kX86CondB = 0x02, // CF==1 (unsigned) + kX86CondBE = 0x06, // CF==1 | ZF==1 (unsigned) + kX86CondC = 0x02, // CF==1 + kX86CondE = 0x04, // ZF==1 (signed/unsigned) + kX86CondG = 0x0F, // ZF==0 & SF==OF (signed) + kX86CondGE = 0x0D, // SF==OF (signed) + kX86CondL = 0x0C, // SF!=OF (signed) + kX86CondLE = 0x0E, // ZF==1 | SF!=OF (signed) + kX86CondNA = 0x06, // CF==1 | ZF==1 (unsigned) + kX86CondNAE = 0x02, // CF==1 (unsigned) + kX86CondNB = 0x03, // CF==0 (unsigned) + kX86CondNBE = 0x07, // CF==0 & ZF==0 (unsigned) + kX86CondNC = 0x03, // CF==0 + kX86CondNE = 0x05, // ZF==0 (signed/unsigned) + kX86CondNG = 0x0E, // ZF==1 | SF!=OF (signed) + kX86CondNGE = 0x0C, // SF!=OF (signed) + kX86CondNL = 0x0D, // SF==OF (signed) + kX86CondNLE = 0x0F, // ZF==0 & SF==OF (signed) + kX86CondNO = 0x01, // OF==0 + kX86CondNP = 0x0B, // PF==0 + kX86CondNS = 0x09, // SF==0 + kX86CondNZ = 0x05, // ZF==0 + kX86CondO = 0x00, // OF==1 + kX86CondP = 0x0A, // PF==1 + kX86CondPE = 0x0A, // PF==1 + kX86CondPO = 0x0B, // PF==0 + kX86CondS = 0x08, // SF==1 + kX86CondZ = 0x04, // ZF==1 + + // Simplified condition codes. + kX86CondSign = kX86CondS, //!< Sign (S). + kX86CondNotSign = kX86CondNS, //!< Not Sign (NS). + + kX86CondOverflow = kX86CondO, //!< Signed Overflow (O) + kX86CondNotOverflow = kX86CondNO, //!< Not Signed Overflow (NO) + + kX86CondLess = kX86CondL, //!< Signed `a < b` (L or NGE). + kX86CondLessEqual = kX86CondLE, //!< Signed `a <= b` (LE or NG ). + kX86CondGreater = kX86CondG, //!< Signed `a > b` (G or NLE). + kX86CondGreaterEqual = kX86CondGE, //!< Signed `a >= b` (GE or NL ). + kX86CondBelow = kX86CondB, //!< Unsigned `a < b` (B or NAE). + kX86CondBelowEqual = kX86CondBE, //!< Unsigned `a <= b` (BE or NA ). + kX86CondAbove = kX86CondA, //!< Unsigned `a > b` (A or NBE). + kX86CondAboveEqual = kX86CondAE, //!< Unsigned `a >= b` (AE or NB ). + kX86CondEqual = kX86CondE, //!< Equal `a == b` (E or Z ). + kX86CondNotEqual = kX86CondNE, //!< Not Equal `a != b` (NE or NZ ). + + kX86CondParityEven = kX86CondP, + kX86CondParityOdd = kX86CondPO, + + // Aliases. + kX86CondZero = kX86CondZ, + kX86CondNotZero = kX86CondNZ, + kX86CondNegative = kX86CondS, + kX86CondPositive = kX86CondNS, + + // FPU-only. + kX86CondFpuUnordered = 0x10, + kX86CondFpuNotUnordered = 0x11, + + //! No condition code. + kX86CondNone = 0x12 +}; + +// ============================================================================ +// [asmjit::X86EFlags] +// ============================================================================ + +//! X86/X64 EFLAGs bits (AsmJit specific). +//! +//! Each instruction stored in AsmJit database contains flags that instruction +//! uses (reads) and flags that instruction modifies (writes). This is used by +//! instruction reordering, but can be used by third parties as it's part of +//! AsmJit API. +//! +//! NOTE: Flags defined here don't correspond to real flags used by X86/X64 +//! architecture, defined in Intel's Manual Section `3.4.3 - EFLAGS Register`. +//! +//! NOTE: Flags are designed to fit in an 8-bit integer. +ASMJIT_ENUM(X86EFlags) { + // -------------------------------------------------------------------------- + // src-gendefs.js relies on the values of these masks, the tool has to be + // changed as you plan to modify `X86EFlags`. + // -------------------------------------------------------------------------- + + kX86EFlagO = 0x01, //!< Overflow flag (OF). + kX86EFlagS = 0x02, //!< Sign flag (SF). + kX86EFlagZ = 0x04, //!< Zero flag (ZF). + kX86EFlagA = 0x08, //!< Adjust flag (AF). + kX86EFlagP = 0x10, //!< Parity flag (PF). + kX86EFlagC = 0x20, //!< Carry flag (CF). + kX86EFlagD = 0x40, //!< Direction flag (DF). + kX86EFlagX = 0x80 //!< Any other flag that AsmJit doesn't use. +}; + +// ============================================================================ +// [asmjit::X86FpSw] +// ============================================================================ + +//! X86/X64 FPU status word. +ASMJIT_ENUM(X86FpSw) { + kX86FpSw_Invalid = 0x0001, + kX86FpSw_Denormalized = 0x0002, + kX86FpSw_DivByZero = 0x0004, + kX86FpSw_Overflow = 0x0008, + kX86FpSw_Underflow = 0x0010, + kX86FpSw_Precision = 0x0020, + kX86FpSw_StackFault = 0x0040, + kX86FpSw_Interrupt = 0x0080, + kX86FpSw_C0 = 0x0100, + kX86FpSw_C1 = 0x0200, + kX86FpSw_C2 = 0x0400, + kX86FpSw_Top = 0x3800, + kX86FpSw_C3 = 0x4000, + kX86FpSw_Busy = 0x8000 +}; + +// ============================================================================ +// [asmjit::X86FpCw] +// ============================================================================ + +//! X86/X64 FPU control word. +ASMJIT_ENUM(X86FpCw) { + // Bits 0-5. + kX86FpCw_EM_Mask = 0x003F, + kX86FpCw_EM_Invalid = 0x0001, + kX86FpCw_EM_Denormal = 0x0002, + kX86FpCw_EM_DivByZero = 0x0004, + kX86FpCw_EM_Overflow = 0x0008, + kX86FpCw_EM_Underflow = 0x0010, + kX86FpCw_EM_Inexact = 0x0020, + + // Bits 8-9. + kX86FpCw_PC_Mask = 0x0300, + kX86FpCw_PC_Float = 0x0000, + kX86FpCw_PC_Reserved = 0x0100, + kX86FpCw_PC_Double = 0x0200, + kX86FpCw_PC_Extended = 0x0300, + + // Bits 10-11. + kX86FpCw_RC_Mask = 0x0C00, + kX86FpCw_RC_Nearest = 0x0000, + kX86FpCw_RC_Down = 0x0400, + kX86FpCw_RC_Up = 0x0800, + kX86FpCw_RC_Truncate = 0x0C00, + + // Bit 12. + kX86FpCw_IC_Mask = 0x1000, + kX86FpCw_IC_Projective = 0x0000, + kX86FpCw_IC_Affine = 0x1000 +}; + +// ============================================================================ +// [asmjit::X86Cmp] +// ============================================================================ + +//! X86/X64 Comparison predicate used by CMP[PD/PS/SD/SS] family instructions. +ASMJIT_ENUM(X86Cmp) { + kX86CmpEQ = 0x00, //!< Equal (Quite). + kX86CmpLT = 0x01, //!< Less (Signaling). + kX86CmpLE = 0x02, //!< Less/Equal (Signaling). + kX86CmpUNORD = 0x03, //!< Unordered (Quite). + kX86CmpNEQ = 0x04, //!< Not Equal (Quite). + kX86CmpNLT = 0x05, //!< Not Less (Signaling). + kX86CmpNLE = 0x06, //!< Not Less/Equal (Signaling). + kX86CmpORD = 0x07 //!< Ordered (Quite). +}; + +// ============================================================================ +// [asmjit::X86VCmp] +// ============================================================================ + +//! X86/X64 Comparison predicate used by VCMP[PD/PS/SD/SS] family instructions. +//! +//! The first 8 are compatible with \ref X86Cmp. +ASMJIT_ENUM(X86VCmp) { + kX86VCmpEQ_OQ = 0x00, //!< Equal (Quite, Ordered). + kX86VCmpLT_OS = 0x01, //!< Less (Signaling, Ordered). + kX86VCmpLE_OS = 0x02, //!< Less/Equal (Signaling, Ordered). + kX86VCmpUNORD_Q = 0x03, //!< Unordered (Quite). + kX86VCmpNEQ_UQ = 0x04, //!< Not Equal (Quite, Unordered). + kX86VCmpNLT_US = 0x05, //!< Not Less (Signaling, Unordered). + kX86VCmpNLE_US = 0x06, //!< Not Less/Equal (Signaling, Unordered). + kX86VCmpORD_Q = 0x07, //!< Ordered (Quite). + + kX86VCmpEQ_UQ = 0x08, //!< Equal (Quite, Unordered). + kX86VCmpNGE_US = 0x09, //!< Not Greater/Equal (Signaling, Unordered). + kX86VCmpNGT_US = 0x0A, //!< Not Greater (Signaling, Unordered). + kX86VCmpFALSE_OQ = 0x0B, //!< False (Quite, Ordered). + kX86VCmpNEQ_OQ = 0x0C, //!< Not Equal (Quite, Ordered). + kX86VCmpGE_OS = 0x0D, //!< Greater/Equal (Signaling, Ordered). + kX86VCmpGT_OS = 0x0E, //!< Greater (Signaling, Ordered). + kX86VCmpTRUE_UQ = 0x0F, //!< True (Quite, Unordered). + kX86VCmpEQ_OS = 0x10, //!< Equal (Signaling, Ordered). + kX86VCmpLT_OQ = 0x11, //!< Less (Quite, Ordered). + kX86VCmpLE_OQ = 0x12, //!< Less/Equal (Quite, Ordered). + kX86VCmpUNORD_S = 0x13, //!< Unordered (Signaling). + kX86VCmpNEQ_US = 0x14, //!< Not Equal (Signaling, Unordered). + kX86VCmpNLT_UQ = 0x15, //!< Not Less (Quite, Unordered). + kX86VCmpNLE_UQ = 0x16, //!< Not Less/Equal (Quite, Unordered). + kX86VCmpORD_S = 0x17, //!< Ordered (Signaling). + kX86VCmpEQ_US = 0x18, //!< Equal (Signaling, Unordered). + kX86VCmpNGE_UQ = 0x19, //!< Not Greater/Equal (Quite, Unordered). + kX86VCmpNGT_UQ = 0x1A, //!< Not Greater (Quite, Unordered). + kX86VCmpFALSE_OS = 0x1B, //!< False (Signaling, Ordered). + kX86VCmpNEQ_OS = 0x1C, //!< Not Equal (Signaling, Ordered). + kX86VCmpGE_OQ = 0x1D, //!< Greater/Equal (Quite, Ordered). + kX86VCmpGT_OQ = 0x1E, //!< Greater (Quite, Ordered). + kX86VCmpTRUE_US = 0x1F //!< True (Signaling, Unordered). +}; + +// ============================================================================ +// [asmjit::X86Round] +// ============================================================================ + +//! X86/X64 round encoding used by ROUND[PD/PS/SD/SS] family instructions. +ASMJIT_ENUM(X86Round) { + kX86RoundNearest = 0x00, //!< Round to nearest (even). + kX86RoundDown = 0x01, //!< Round to down toward -INF (floor), + kX86RoundUp = 0x02, //!< Round to up toward +INF (ceil). + kX86RoundTrunc = 0x03, //!< Round toward zero (truncate). + kX86RoundCurrent = 0x04, //!< Round to the current rounding mode set (ignores other RC bits). + kX86RoundInexact = 0x08 //!< Avoid the inexact exception, if set. +}; + +// ============================================================================ +// [asmjit::X86Prefetch] +// ============================================================================ + +//! X86/X64 Prefetch hints. +ASMJIT_ENUM(X86Prefetch) { + kX86PrefetchNTA = 0, //!< Prefetch by using NT hint. + kX86PrefetchT0 = 1, //!< Prefetch to L0 cache. + kX86PrefetchT1 = 2, //!< Prefetch to L1 cache. + kX86PrefetchT2 = 3 //!< Prefetch to L2 cache. +}; + +// ============================================================================ +// [asmjit::X86InstExtendedInfo] +// ============================================================================ + +//! X86/X64 instruction extended information. +//! +//! Extended information has been introduced to minimize data needed for a +//! single instruction, because two or more instructions can share the common +//! data, for example operands definition or secondary opcode, which is only +//! used by few instructions. +struct X86InstExtendedInfo { + // -------------------------------------------------------------------------- + // [Accessors - Instruction Encoding] + // -------------------------------------------------------------------------- + + //! Get instruction encoding, see \ref kX86InstEncoding. + ASMJIT_INLINE uint32_t getEncoding() const noexcept { + return _encoding; + } + + // -------------------------------------------------------------------------- + // [Accessors - Instruction Flags] + // -------------------------------------------------------------------------- + + //! Get whether the instruction has a `flag`, see `X86InstFlags`. + ASMJIT_INLINE bool hasFlag(uint32_t flag) const noexcept { + return (_instFlags & flag) != 0; + } + + //! Get all instruction flags, see `X86InstFlags`. + ASMJIT_INLINE uint32_t getFlags() const noexcept { + return _instFlags; + } + + //! Get if the first operand is read-only. + ASMJIT_INLINE bool isRO() const noexcept { + return (getFlags() & kX86InstFlagRW) == kX86InstFlagRO; + } + + //! Get if the first operand is write-only. + ASMJIT_INLINE bool isWO() const noexcept { + return (getFlags() & kX86InstFlagRW) == kX86InstFlagWO; + } + + //! Get if the first operand is read-write. + ASMJIT_INLINE bool isRW() const noexcept { + return (getFlags() & kX86InstFlagRW) == kX86InstFlagRW; + } + + //! Get whether the instruction is a typical Exchange instruction. + //! + //! Exchange instructions are 'xchg' and 'xadd'. + ASMJIT_INLINE bool isXchg() const noexcept { + return hasFlag(kX86InstFlagXchg); + } + + //! Get whether the instruction is a control-flow instruction. + //! + //! Control flow instruction is instruction that can perform a branch, + //! typically `jmp`, `jcc`, `call`, or `ret`. + ASMJIT_INLINE bool isFlow() const noexcept { + return hasFlag(kX86InstFlagFlow); + } + + //! Get whether the instruction accesses Fp register(s). + ASMJIT_INLINE bool isFp() const noexcept { + return hasFlag(kX86InstFlagFp); + } + + //! Get whether the instruction can be prefixed by LOCK prefix. + ASMJIT_INLINE bool isLockable() const noexcept { + return hasFlag(kX86InstFlagLock); + } + + //! Get whether the instruction is special type (this is used by `Compiler` + //! to manage additional variables or functionality). + ASMJIT_INLINE bool isSpecial() const noexcept { + return hasFlag(kX86InstFlagSpecial); + } + + //! Get whether the instruction is special type and it performs memory access. + ASMJIT_INLINE bool isSpecialMem() const noexcept { + return hasFlag(kX86InstFlagSpecialMem); + } + + //! Get whether the move instruction zeroes the rest of the register + //! if the source is memory operand. + //! + //! Basically flag needed only to support `movsd` and `movss` instructions. + ASMJIT_INLINE bool isZeroIfMem() const noexcept { + return hasFlag(kX86InstFlagZeroIfMem); + } + + // -------------------------------------------------------------------------- + // [Accessors - EFlags] + // -------------------------------------------------------------------------- + + //! Get EFLAGS that the instruction reads, see \ref X86EFlags. + ASMJIT_INLINE uint32_t getEFlagsIn() const noexcept { + return _eflagsIn; + } + + //! Get EFLAGS that the instruction writes, see \ref X86EFlags. + ASMJIT_INLINE uint32_t getEFlagsOut() const noexcept { + return _eflagsOut; + } + + // -------------------------------------------------------------------------- + // [Accessors - Write Index/Size] + // -------------------------------------------------------------------------- + + //! Get the destination index of WRITE operation. + ASMJIT_INLINE uint32_t getWriteIndex() const noexcept { + return _writeIndex; + } + + //! Get the number of bytes that will be written by a WRITE operation. + ASMJIT_INLINE uint32_t getWriteSize() const noexcept { + return _writeSize; + } + + // -------------------------------------------------------------------------- + // [Accessors - Operand-Flags] + // -------------------------------------------------------------------------- + + //! Get flags of operand at index `index`. + //! + //! See \ref X86InstInfo::getOperandFlags() for more details. + ASMJIT_INLINE uint16_t getOperandFlags(uint32_t index) const noexcept { + ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_opFlags)); + return _opFlags[index]; + } + + // -------------------------------------------------------------------------- + // [Accessors - OpCode] + // -------------------------------------------------------------------------- + + //! Get the secondary instruction opcode, see \ref X86InstOpCodeFlags. + //! + //! See \ref X86InstInfo::getSecondaryOpCode() for more details. + ASMJIT_INLINE uint32_t getSecondaryOpCode() const noexcept { + return _secondaryOpCode; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! Instruction encoding. + uint8_t _encoding; + + //! Destination byte of WRITE operation, default 0. + uint8_t _writeIndex; + + //! Count of bytes affected by a write operation, needed by analysis for all + //! instructions that do not read the overwritten register. Only used with + //! `kX86InstFlagWO` flag. If `_writeSize` is zero it is automatically deduced + //! from the size of the destination register. + //! + //! In general most of SSE write-only instructions should use 16 bytes as + //! this is the size of the register (and of YMM/ZMM registers). This means + //! that 16-bytes of the register are changed, the rest remains unchanged. + //! However, AVX instructions should use the size of ZMM register as every + //! AVX instruction clears the rest of the register (AVX/AVX2 instructions + //! zero the HI part of ZMM if available). + uint8_t _writeSize; + + //! EFlags read by the instruction. + uint8_t _eflagsIn; + //! EFlags written by the instruction. + uint8_t _eflagsOut; + + //! \internal + uint8_t _reserved; + + //! Operands' flags, up to 5 operands. + uint16_t _opFlags[5]; + + //! Instruction flags. + uint32_t _instFlags; + + //! Secondary opcode. + uint32_t _secondaryOpCode; +}; + +// ============================================================================ +// [asmjit::X86InstInfo] +// ============================================================================ + +//! X86/X64 instruction information. +struct X86InstInfo { + // -------------------------------------------------------------------------- + // [Accessors - Extended-Info] + // -------------------------------------------------------------------------- + + //! Get `X86InstExtendedInfo` for this instruction. + ASMJIT_INLINE const X86InstExtendedInfo& getExtendedInfo() const noexcept { + return _x86InstExtendedInfo[_extendedIndex]; + } + + //! Get index to the `_x86InstExtendedInfo` table. + ASMJIT_INLINE uint32_t _getExtendedIndex() const noexcept { + return _extendedIndex; + } + + // -------------------------------------------------------------------------- + // [Accessors - Instruction Encoding] + // -------------------------------------------------------------------------- + + //! Get instruction group, see \ref X86InstEncoding. + ASMJIT_INLINE uint32_t getEncoding() const noexcept { + return getExtendedInfo().getEncoding(); + } + + // -------------------------------------------------------------------------- + // [Accessors - Instruction Flags] + // -------------------------------------------------------------------------- + + //! Get whether the instruction has flag `flag`, see `X86InstFlags`. + ASMJIT_INLINE bool hasFlag(uint32_t flag) const noexcept { + return (getFlags() & flag) != 0; + } + + //! Get instruction flags, see `X86InstFlags`. + ASMJIT_INLINE uint32_t getFlags() const noexcept { + return getExtendedInfo().getFlags(); + } + + // -------------------------------------------------------------------------- + // [Accessors - EFlags] + // -------------------------------------------------------------------------- + + //! Get EFLAGS that the instruction reads, see \ref X86EFlags. + ASMJIT_INLINE uint32_t getEFlagsIn() const noexcept { + return getExtendedInfo().getEFlagsIn(); + } + + //! Get EFLAGS that the instruction writes, see \ref X86EFlags. + ASMJIT_INLINE uint32_t getEFlagsOut() const noexcept { + return getExtendedInfo().getEFlagsOut(); + } + + // -------------------------------------------------------------------------- + // [Accessors - Write Index/Size] + // -------------------------------------------------------------------------- + + //! Get the destination index of WRITE operation. + ASMJIT_INLINE uint32_t getWriteIndex() const noexcept { + return getExtendedInfo().getWriteIndex(); + } + + //! Get the number of bytes that will be written by a WRITE operation. + ASMJIT_INLINE uint32_t getWriteSize() const noexcept { + return getExtendedInfo().getWriteSize(); + } + + // -------------------------------------------------------------------------- + // [Accessors - Operand-Flags] + // -------------------------------------------------------------------------- + + //! Get flags of operand at index `index`. + ASMJIT_INLINE uint32_t getOperandFlags(uint32_t index) const noexcept { + return getExtendedInfo().getOperandFlags(index); + } + + // -------------------------------------------------------------------------- + // [Accessors - OpCode] + // -------------------------------------------------------------------------- + + //! Get the primary instruction opcode, see \ref X86InstOpCodeFlags. + ASMJIT_INLINE uint32_t getPrimaryOpCode() const noexcept { + return _primaryOpCode; + } + + //! Get the secondary instruction opcode, see \ref X86InstOpCodeFlags. + ASMJIT_INLINE uint32_t getSecondaryOpCode() const noexcept { + return getExtendedInfo().getSecondaryOpCode(); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! \internal + uint16_t _reserved; + //! Extended information name index in `_x86InstExtendedInfo[]` array. + uint16_t _extendedIndex; + + //! Primary opcode, secondary opcode is stored in `X86InstExtendedInfo` table. + uint32_t _primaryOpCode; +}; + +// ============================================================================ +// [asmjit::X86Util] +// ============================================================================ + +struct X86Util { + // -------------------------------------------------------------------------- + // [Instruction Id <-> Name] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_DISABLE_TEXT) + //! Get an instruction ID from a given instruction `name`. + //! + //! If there is an exact match the instruction id is returned, otherwise + //! `kInstIdNone` (zero) is returned. + //! + //! The given `name` doesn't have to be null-terminated if `len` is provided. + ASMJIT_API static uint32_t getInstIdByName(const char* name, size_t len = kInvalidIndex) noexcept; + + //! Get an instruction name from a given instruction `id`. + ASMJIT_API static const char* getInstNameById(uint32_t id) noexcept; +#endif // !ASMJIT_DISABLE_TEXT + + // -------------------------------------------------------------------------- + // [Instruction Info] + // -------------------------------------------------------------------------- + + //! Get instruction information based on `instId`. + //! + //! NOTE: `instId` has to be valid instruction ID, it can't be greater than + //! or equal to `_kX86InstIdCount`. It asserts in debug mode. + static ASMJIT_INLINE const X86InstInfo& getInstInfo(uint32_t instId) noexcept { + ASMJIT_ASSERT(instId < _kX86InstIdCount); + return _x86InstInfo[instId]; + } + + // -------------------------------------------------------------------------- + // [Condition Codes] + // -------------------------------------------------------------------------- + + //! Corresponds to transposing the operands of a comparison. + static ASMJIT_INLINE uint32_t reverseCond(uint32_t cond) noexcept { + ASMJIT_ASSERT(cond < ASMJIT_ARRAY_SIZE(_x86ReverseCond)); + return _x86ReverseCond[cond]; + } + + //! Get the equivalent of negated condition code. + static ASMJIT_INLINE uint32_t negateCond(uint32_t cond) noexcept { + ASMJIT_ASSERT(cond < ASMJIT_ARRAY_SIZE(_x86ReverseCond)); + return cond ^ static_cast(cond < kX86CondNone); + } + + //! Translate condition code `cc` to `cmovcc` instruction code. + //! \sa \ref X86InstId, \ref _kX86InstIdCmovcc. + static ASMJIT_INLINE uint32_t condToCmovcc(uint32_t cond) noexcept { + ASMJIT_ASSERT(static_cast(cond) < ASMJIT_ARRAY_SIZE(_x86CondToCmovcc)); + return _x86CondToCmovcc[cond]; + } + + //! Translate condition code `cc` to `jcc` instruction code. + //! \sa \ref X86InstId, \ref _kX86InstIdJcc. + static ASMJIT_INLINE uint32_t condToJcc(uint32_t cond) noexcept { + ASMJIT_ASSERT(static_cast(cond) < ASMJIT_ARRAY_SIZE(_x86CondToJcc)); + return _x86CondToJcc[cond]; + } + + //! Translate condition code `cc` to `setcc` instruction code. + //! \sa \ref X86InstId, \ref _kX86InstIdSetcc. + static ASMJIT_INLINE uint32_t condToSetcc(uint32_t cond) noexcept { + ASMJIT_ASSERT(static_cast(cond) < ASMJIT_ARRAY_SIZE(_x86CondToSetcc)); + return _x86CondToSetcc[cond]; + } + + // -------------------------------------------------------------------------- + // [Shuffle (SIMD)] + // -------------------------------------------------------------------------- + + //! Pack a shuffle constant to be used with multimedia instructions (2 values). + //! + //! \param a Position of the first component [0, 1], inclusive. + //! \param b Position of the second component [0, 1], inclusive. + //! + //! Shuffle constants can be used to encode an immediate for these instructions: + //! - `shufpd` + static ASMJIT_INLINE int shuffle(uint32_t a, uint32_t b) noexcept { + ASMJIT_ASSERT(a <= 0x1 && b <= 0x1); + uint32_t result = (a << 1) | b; + return static_cast(result); + } + + //! Pack a shuffle constant to be used with multimedia instructions (4 values). + //! + //! \param a Position of the first component [0, 3], inclusive. + //! \param b Position of the second component [0, 3], inclusive. + //! \param c Position of the third component [0, 3], inclusive. + //! \param d Position of the fourth component [0, 3], inclusive. + //! + //! Shuffle constants can be used to encode an immediate for these instructions: + //! - `pshufw()` + //! - `pshufd()` + //! - `pshuflw()` + //! - `pshufhw()` + //! - `shufps()` + static ASMJIT_INLINE int shuffle(uint32_t a, uint32_t b, uint32_t c, uint32_t d) noexcept { + ASMJIT_ASSERT(a <= 0x3 && b <= 0x3 && c <= 0x3 && d <= 0x3); + uint32_t result = (a << 6) | (b << 4) | (c << 2) | d; + return static_cast(result); + } +}; + +//! \} + +} // asmjit namespace + +#undef _OP_ID + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86INST_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86operand.cpp b/DynamicHooks/thirdparty/AsmJit/x86/x86operand.cpp new file mode 100644 index 0000000..81bdf95 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86operand.cpp @@ -0,0 +1,85 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies] +#include "../x86/x86operand.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { +namespace x86 { + +// ============================================================================ +// [asmjit::X86Mem - abs[]] +// ============================================================================ + +X86Mem ptr_abs(Ptr p, int32_t disp, uint32_t size) noexcept { + X86Mem m(NoInit); + + m._init_packed_op_sz_b0_b1_id(Operand::kTypeMem, size, kMemTypeAbsolute, 0, kInvalidValue); + m._vmem.index = kInvalidValue; + m._vmem.displacement = static_cast((intptr_t)(p + disp)); + + return m; +} + +X86Mem ptr_abs(Ptr p, const X86Reg& index, uint32_t shift, int32_t disp, uint32_t size) noexcept { + X86Mem m(NoInit); + uint32_t flags = shift << kX86MemShiftIndex; + + if (index.isGp()) + flags |= X86Mem::_getGpdFlags(index); + else if (index.isXmm()) + flags |= kX86MemVSibXmm << kX86MemVSibIndex; + else if (index.isYmm()) + flags |= kX86MemVSibYmm << kX86MemVSibIndex; + + m._init_packed_op_sz_b0_b1_id(Operand::kTypeMem, size, kMemTypeAbsolute, flags, kInvalidValue); + m._vmem.index = index.getRegIndex(); + m._vmem.displacement = static_cast((intptr_t)(p + disp)); + + return m; +} + +#if !defined(ASMJIT_DISABLE_COMPILER) +X86Mem ptr_abs(Ptr p, const X86Var& index, uint32_t shift, int32_t disp, uint32_t size) noexcept { + X86Mem m(NoInit); + uint32_t flags = shift << kX86MemShiftIndex; + + const Var& index_ = reinterpret_cast(index); + uint32_t indexRegType = index_.getRegType(); + + if (indexRegType <= kX86RegTypeGpq) + flags |= X86Mem::_getGpdFlags(reinterpret_cast(index)); + else if (indexRegType == kX86RegTypeXmm) + flags |= kX86MemVSibXmm << kX86MemVSibIndex; + else if (indexRegType == kX86RegTypeYmm) + flags |= kX86MemVSibYmm << kX86MemVSibIndex; + + m._init_packed_op_sz_b0_b1_id(Operand::kTypeMem, size, kMemTypeAbsolute, flags, kInvalidValue); + m._vmem.index = index_.getId(); + m._vmem.displacement = static_cast((intptr_t)(p + disp)); + + return m; +} +#endif // !ASMJIT_DISABLE_COMPILER + +} // x86 namespace +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86operand.h b/DynamicHooks/thirdparty/AsmJit/x86/x86operand.h new file mode 100644 index 0000000..3379204 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86operand.h @@ -0,0 +1,2592 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86OPERAND_H +#define _ASMJIT_X86_X86OPERAND_H + +// [Dependencies] +#include "../base/assembler.h" +#include "../base/compiler.h" +#include "../base/operand.h" +#include "../base/utils.h" +#include "../base/vectypes.h" + +// [Api-Begin] +#include "../apibegin.h" + +//! \internal +//! +//! Internal macro to get an operand ID casting it to `Operand`. Basically +//! allows to get an id of operand that has been just 'typedef'ed. +#define ASMJIT_OP_ID(_Op_) reinterpret_cast(_Op_).getId() + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class X86Reg; +class X86GpReg; +class X86FpReg; +class X86MmReg; +class X86KReg; +class X86XmmReg; +class X86YmmReg; +class X86ZmmReg; + +class X86SegReg; +class X86RipReg; + +#if !defined(ASMJIT_DISABLE_COMPILER) +class X86Var; +class X86GpVar; +class X86MmVar; +class X86KVar; +class X86XmmVar; +class X86YmmVar; +class X86ZmmVar; +#endif // !ASMJIT_DISABLE_COMPILER + +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::X86RegClass] +// ============================================================================ + +//! X86/X64 register class. +ASMJIT_ENUM(X86RegClass) { + // -------------------------------------------------------------------------- + // [Regs & Vars] + // -------------------------------------------------------------------------- + + //! X86/X64 Gp register class (compatible with universal \ref kRegClassGp). + kX86RegClassGp = kRegClassGp, + //! X86/X64 Mm register class. + kX86RegClassMm = 1, + //! X86/X64 K register class. + kX86RegClassK = 2, + //! X86/X64 XMM/YMM/ZMM register class. + kX86RegClassXyz = 3, + + //! \internal + //! + //! Last register class that is managed by `X86Compiler`, used by asserts. + _kX86RegClassManagedCount = 4, + + // -------------------------------------------------------------------------- + // [Regs Only] + // -------------------------------------------------------------------------- + + //! X86/X64 Fp register class. + kX86RegClassFp = 4, + + //! Count of X86/X64 register classes. + kX86RegClassCount = 5 +}; + +// ============================================================================ +// [asmjit::X86RegType] +// ============================================================================ + +//! X86/X64 register type. +ASMJIT_ENUM(X86RegType) { + //! Low GPB register (AL, BL, CL, DL, ...). + kX86RegTypeGpbLo = 0x01, + //! High GPB register (AH, BH, CH, DH only). + kX86RegTypeGpbHi = 0x02, + + //! \internal + //! + //! High GPB register patched to a native index (4-7). + _kX86RegTypePatchedGpbHi = kX86RegTypeGpbLo | kX86RegTypeGpbHi, + + //! GPW register. + kX86RegTypeGpw = 0x10, + //! GPD register. + kX86RegTypeGpd = 0x20, + //! GPQ register (X64). + kX86RegTypeGpq = 0x30, + + //! FPU register. + kX86RegTypeFp = 0x40, + //! MMX register (MMX+). + kX86RegTypeMm = 0x50, + + //! K register (AVX512+). + kX86RegTypeK = 0x60, + + //! XMM register (SSE+). + kX86RegTypeXmm = 0x70, + //! YMM register (AVX+). + kX86RegTypeYmm = 0x80, + //! ZMM register (AVX512+). + kX86RegTypeZmm = 0x90, + + //! Instruction pointer (RIP). + kX86RegTypeRip = 0xE0, + //! Segment register. + kX86RegTypeSeg = 0xF0 +}; + +// ============================================================================ +// [asmjit::X86RegIndex] +// ============================================================================ + +//! X86/X64 register indexes. +//! +//! NOTE: Register indexes have been reduced to only support general purpose +//! registers. There is no need to have enumerations with number suffix that +//! expands to the exactly same value as the suffix value itself. +ASMJIT_ENUM(X86RegIndex) { + //! Index of Al/Ah/Ax/Eax/Rax registers. + kX86RegIndexAx = 0, + //! Index of Cl/Ch/Cx/Ecx/Rcx registers. + kX86RegIndexCx = 1, + //! Index of Dl/Dh/Dx/Edx/Rdx registers. + kX86RegIndexDx = 2, + //! Index of Bl/Bh/Bx/Ebx/Rbx registers. + kX86RegIndexBx = 3, + //! Index of Spl/Sp/Esp/Rsp registers. + kX86RegIndexSp = 4, + //! Index of Bpl/Bp/Ebp/Rbp registers. + kX86RegIndexBp = 5, + //! Index of Sil/Si/Esi/Rsi registers. + kX86RegIndexSi = 6, + //! Index of Dil/Di/Edi/Rdi registers. + kX86RegIndexDi = 7, + //! Index of R8b/R8w/R8d/R8 registers (64-bit only). + kX86RegIndexR8 = 8, + //! Index of R9B/R9w/R9d/R9 registers (64-bit only). + kX86RegIndexR9 = 9, + //! Index of R10B/R10w/R10D/R10 registers (64-bit only). + kX86RegIndexR10 = 10, + //! Index of R11B/R11w/R11d/R11 registers (64-bit only). + kX86RegIndexR11 = 11, + //! Index of R12B/R12w/R12d/R12 registers (64-bit only). + kX86RegIndexR12 = 12, + //! Index of R13B/R13w/R13d/R13 registers (64-bit only). + kX86RegIndexR13 = 13, + //! Index of R14B/R14w/R14d/R14 registers (64-bit only). + kX86RegIndexR14 = 14, + //! Index of R15B/R15w/R15d/R15 registers (64-bit only). + kX86RegIndexR15 = 15 +}; + +// ============================================================================ +// [asmjit::X86Seg] +// ============================================================================ + +//! X86/X64 segment codes. +ASMJIT_ENUM(X86Seg) { + //! No/Default segment. + kX86SegDefault = 0, + //! Es segment. + kX86SegEs = 1, + //! Cs segment. + kX86SegCs = 2, + //! Ss segment. + kX86SegSs = 3, + //! Ds segment. + kX86SegDs = 4, + //! Fs segment. + kX86SegFs = 5, + //! Gs segment. + kX86SegGs = 6, + + //! Count of X86 segment registers supported by AsmJit. + //! + //! NOTE: X86 architecture has 6 segment registers - ES, CS, SS, DS, FS, GS. + //! X64 architecture lowers them down to just FS and GS. AsmJit supports 7 + //! segment registers - all addressable in both X86 and X64 modes and one + //! extra called `kX86SegDefault`, which is AsmJit specific and means that there + //! is no segment register specified so the segment prefix will not be emitted. + kX86SegCount = 7 +}; + +// ============================================================================ +// [asmjit::X86MemVSib] +// ============================================================================ + +//! X86/X64 index register legacy and AVX2 (VSIB) support. +ASMJIT_ENUM(X86MemVSib) { + //! Memory operand uses GPD/GPQ index (or no index register). + kX86MemVSibGpz = 0, + //! Memory operand uses XMM index (or no index register). + kX86MemVSibXmm = 1, + //! Memory operand uses YMM index (or no index register). + kX86MemVSibYmm = 2, + //! Memory operand uses ZMM index (or no index register). + kX86MemVSibZmm = 3 +}; + +// ============================================================================ +// [asmjit::X86MemFlags] +// ============================================================================ + +//! \internal +//! +//! X86/X64 specific memory flags. +ASMJIT_ENUM(X86MemFlags) { + kX86MemSegBits = 0x7, + kX86MemSegIndex = 0, + kX86MemSegMask = kX86MemSegBits << kX86MemSegIndex, + + kX86MemGpdBits = 0x1, + kX86MemGpdIndex = 3, + kX86MemGpdMask = kX86MemGpdBits << kX86MemGpdIndex, + + kX86MemVSibBits = 0x3, + kX86MemVSibIndex = 4, + kX86MemVSibMask = kX86MemVSibBits << kX86MemVSibIndex, + + kX86MemShiftBits = 0x3, + kX86MemShiftIndex = 6, + kX86MemShiftMask = kX86MemShiftBits << kX86MemShiftIndex +}; + +// ============================================================================ +// [asmjit::X86VarType] +// ============================================================================ + +//! X86/X64 variable type. +ASMJIT_ENUM(X86VarType) { + //! Variable is SP-FP (FPU). + kX86VarTypeFp32 = kVarTypeFp32, + //! Variable is DP-FP (FPU). + kX86VarTypeFp64 = kVarTypeFp64, + + //! Variable is MMX (MMX). + kX86VarTypeMm = 12, + + //! Variable is K (AVX512+) + kX86VarTypeK, + + //! Variable is XMM (SSE+). + kX86VarTypeXmm, + //! Variable is a scalar XMM SP-FP number. + kX86VarTypeXmmSs, + //! Variable is a packed XMM SP-FP number (4 floats). + kX86VarTypeXmmPs, + //! Variable is a scalar XMM DP-FP number. + kX86VarTypeXmmSd, + //! Variable is a packed XMM DP-FP number (2 doubles). + kX86VarTypeXmmPd, + + //! Variable is YMM (AVX+). + kX86VarTypeYmm, + //! Variable is a packed YMM SP-FP number (8 floats). + kX86VarTypeYmmPs, + //! Variable is a packed YMM DP-FP number (4 doubles). + kX86VarTypeYmmPd, + + //! Variable is ZMM (AVX512+). + kX86VarTypeZmm, + //! Variable is a packed ZMM SP-FP number (16 floats). + kX86VarTypeZmmPs, + //! Variable is a packed ZMM DP-FP number (8 doubles). + kX86VarTypeZmmPd, + + //! Count of variable types. + kX86VarTypeCount, + + //! \internal + //! \{ + _kX86VarTypeMmStart = kX86VarTypeMm, + _kX86VarTypeMmEnd = kX86VarTypeMm, + + _kX86VarTypeXmmStart = kX86VarTypeXmm, + _kX86VarTypeXmmEnd = kX86VarTypeXmmPd, + + _kX86VarTypeYmmStart = kX86VarTypeYmm, + _kX86VarTypeYmmEnd = kX86VarTypeYmmPd, + + _kX86VarTypeZmmStart = kX86VarTypeZmm, + _kX86VarTypeZmmEnd = kX86VarTypeZmmPd + //! \} +}; + +// ============================================================================ +// [asmjit::X86RegCount] +// ============================================================================ + +//! \internal +//! +//! X86/X64 registers count. +//! +//! Since the number of registers changed across CPU generations `X86RegCount` +//! class is used by `X86Assembler` and `X86Compiler` to provide a way to get +//! number of available registers dynamically. 32-bit mode offers always only +//! 8 registers of all classes, however, 64-bit mode offers 16 Gp registers and +//! 16 XMM/YMM/ZMM registers. AVX512 instruction set doubles the number of SIMD +//! registers (XMM/YMM/ZMM) to 32, this mode has to be explicitly enabled to +//! take effect as it changes some assumptions. +//! +//! `X86RegCount` is also used extensively by `X86Compiler`'s register allocator +//! and data structures. Fp registers were omitted as they are never mapped to +//! variables, thus, not needed to be managed. +//! +//! NOTE: At the moment `X86RegCount` can fit into 32-bits, having 8-bits for +//! each register class except `fp`. This can change in the future after a +//! new instruction set, which adds more registers, is introduced. +struct X86RegCount { + // -------------------------------------------------------------------------- + // [Zero] + // -------------------------------------------------------------------------- + + //! Reset all counters to zero. + ASMJIT_INLINE void reset() noexcept { _packed = 0; } + + // -------------------------------------------------------------------------- + // [Get] + // -------------------------------------------------------------------------- + + //! Get register count by a register class `rc`. + ASMJIT_INLINE uint32_t get(uint32_t rc) const noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + uint32_t shift = Utils::byteShiftOfDWordStruct(rc); + return (_packed >> shift) & static_cast(0xFF); + } + + //! Get Gp count. + ASMJIT_INLINE uint32_t getGp() const noexcept { return get(kX86RegClassGp); } + //! Get Mm count. + ASMJIT_INLINE uint32_t getMm() const noexcept { return get(kX86RegClassMm); } + //! Get K count. + ASMJIT_INLINE uint32_t getK() const noexcept { return get(kX86RegClassK); } + //! Get XMM/YMM/ZMM count. + ASMJIT_INLINE uint32_t getXyz() const noexcept { return get(kX86RegClassXyz); } + + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + //! Set register count by a register class `rc`. + ASMJIT_INLINE void set(uint32_t rc, uint32_t n) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + ASMJIT_ASSERT(n <= 0xFF); + + uint32_t shift = Utils::byteShiftOfDWordStruct(rc); + _packed = (_packed & ~static_cast(0xFF << shift)) + (n << shift); + } + + //! Set Gp count. + ASMJIT_INLINE void setGp(uint32_t n) noexcept { set(kX86RegClassGp, n); } + //! Set Mm count. + ASMJIT_INLINE void setMm(uint32_t n) noexcept { set(kX86RegClassMm, n); } + //! Set K count. + ASMJIT_INLINE void setK(uint32_t n) noexcept { set(kX86RegClassK, n); } + //! Set XMM/YMM/ZMM count. + ASMJIT_INLINE void setXyz(uint32_t n) noexcept { set(kX86RegClassXyz, n); } + + // -------------------------------------------------------------------------- + // [Add] + // -------------------------------------------------------------------------- + + //! Add register count by a register class `rc`. + ASMJIT_INLINE void add(uint32_t rc, uint32_t n = 1) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + ASMJIT_ASSERT(0xFF - static_cast(_regs[rc]) >= n); + + uint32_t shift = Utils::byteShiftOfDWordStruct(rc); + _packed += n << shift; + } + + //! Add GP count. + ASMJIT_INLINE void addGp(uint32_t n) noexcept { add(kX86RegClassGp, n); } + //! Add MMX count. + ASMJIT_INLINE void addMm(uint32_t n) noexcept { add(kX86RegClassMm, n); } + //! Add K count. + ASMJIT_INLINE void addK(uint32_t n) noexcept { add(kX86RegClassK, n); } + //! Add XMM/YMM/ZMM count. + ASMJIT_INLINE void addXyz(uint32_t n) noexcept { add(kX86RegClassXyz, n); } + + // -------------------------------------------------------------------------- + // [Misc] + // -------------------------------------------------------------------------- + + //! Build register indexes based on the given `count` of registers. + ASMJIT_INLINE void indexFromRegCount(const X86RegCount& count) noexcept { + uint32_t x = static_cast(count._regs[0]); + uint32_t y = static_cast(count._regs[1]) + x; + uint32_t z = static_cast(count._regs[2]) + y; + + ASMJIT_ASSERT(y <= 0xFF); + ASMJIT_ASSERT(z <= 0xFF); + _packed = Utils::pack32_4x8(0, x, y, z); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + struct { + //! Count of GP registers. + uint8_t _gp; + //! Count of MMX registers. + uint8_t _mm; + //! Count of K registers. + uint8_t _k; + //! Count of XMM/YMM/ZMM registers. + uint8_t _xyz; + }; + + uint8_t _regs[4]; + uint32_t _packed; + }; +}; + +// ============================================================================ +// [asmjit::X86RegMask] +// ============================================================================ + +//! \internal +//! +//! X86/X64 registers mask. +struct X86RegMask { + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! Reset all register masks to zero. + ASMJIT_INLINE void reset() noexcept { + _packed.reset(); + } + + // -------------------------------------------------------------------------- + // [IsEmpty / Has] + // -------------------------------------------------------------------------- + + //! Get whether all register masks are zero (empty). + ASMJIT_INLINE bool isEmpty() const noexcept { + return _packed.isZero(); + } + + ASMJIT_INLINE bool has(uint32_t rc, uint32_t mask = 0xFFFFFFFF) const noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + switch (rc) { + case kX86RegClassGp : return (static_cast(_gp ) & mask) != 0; + case kX86RegClassMm : return (static_cast(_mm ) & mask) != 0; + case kX86RegClassK : return (static_cast(_k ) & mask) != 0; + case kX86RegClassXyz: return (static_cast(_xyz) & mask) != 0; + } + + return false; + } + + ASMJIT_INLINE bool hasGp(uint32_t mask = 0xFFFFFFFF) const noexcept { return has(kX86RegClassGp, mask); } + ASMJIT_INLINE bool hasMm(uint32_t mask = 0xFFFFFFFF) const noexcept { return has(kX86RegClassMm, mask); } + ASMJIT_INLINE bool hasK(uint32_t mask = 0xFFFFFFFF) const noexcept { return has(kX86RegClassK, mask); } + ASMJIT_INLINE bool hasXyz(uint32_t mask = 0xFFFFFFFF) const noexcept { return has(kX86RegClassXyz, mask); } + + // -------------------------------------------------------------------------- + // [Get] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t get(uint32_t rc) const noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + switch (rc) { + case kX86RegClassGp : return _gp; + case kX86RegClassMm : return _mm; + case kX86RegClassK : return _k; + case kX86RegClassXyz: return _xyz; + } + + return 0; + } + + ASMJIT_INLINE uint32_t getGp() const noexcept { return get(kX86RegClassGp); } + ASMJIT_INLINE uint32_t getMm() const noexcept { return get(kX86RegClassMm); } + ASMJIT_INLINE uint32_t getK() const noexcept { return get(kX86RegClassK); } + ASMJIT_INLINE uint32_t getXyz() const noexcept { return get(kX86RegClassXyz); } + + // -------------------------------------------------------------------------- + // [Zero] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void zero(uint32_t rc) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + switch (rc) { + case kX86RegClassGp : _gp = 0; break; + case kX86RegClassMm : _mm = 0; break; + case kX86RegClassK : _k = 0; break; + case kX86RegClassXyz: _xyz = 0; break; + } + } + + ASMJIT_INLINE void zeroGp() noexcept { zero(kX86RegClassGp); } + ASMJIT_INLINE void zeroMm() noexcept { zero(kX86RegClassMm); } + ASMJIT_INLINE void zeroK() noexcept { zero(kX86RegClassK); } + ASMJIT_INLINE void zeroXyz() noexcept { zero(kX86RegClassXyz); } + + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void set(const X86RegMask& other) noexcept { + _packed = other._packed; + } + + ASMJIT_INLINE void set(uint32_t rc, uint32_t mask) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + switch (rc) { + case kX86RegClassGp : _gp = static_cast(mask); break; + case kX86RegClassMm : _mm = static_cast(mask); break; + case kX86RegClassK : _k = static_cast(mask); break; + case kX86RegClassXyz: _xyz = static_cast(mask); break; + } + } + + ASMJIT_INLINE void setGp(uint32_t mask) noexcept { return set(kX86RegClassGp, mask); } + ASMJIT_INLINE void setMm(uint32_t mask) noexcept { return set(kX86RegClassMm, mask); } + ASMJIT_INLINE void setK(uint32_t mask) noexcept { return set(kX86RegClassK, mask); } + ASMJIT_INLINE void setXyz(uint32_t mask) noexcept { return set(kX86RegClassXyz, mask); } + + // -------------------------------------------------------------------------- + // [And] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void and_(const X86RegMask& other) noexcept { + _packed.and_(other._packed); + } + + ASMJIT_INLINE void and_(uint32_t rc, uint32_t mask) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + switch (rc) { + case kX86RegClassGp : _gp &= static_cast(mask); break; + case kX86RegClassMm : _mm &= static_cast(mask); break; + case kX86RegClassK : _k &= static_cast(mask); break; + case kX86RegClassXyz: _xyz &= static_cast(mask); break; + } + } + + ASMJIT_INLINE void andGp(uint32_t mask) noexcept { and_(kX86RegClassGp, mask); } + ASMJIT_INLINE void andMm(uint32_t mask) noexcept { and_(kX86RegClassMm, mask); } + ASMJIT_INLINE void andK(uint32_t mask) noexcept { and_(kX86RegClassK, mask); } + ASMJIT_INLINE void andXyz(uint32_t mask) noexcept { and_(kX86RegClassXyz, mask); } + + // -------------------------------------------------------------------------- + // [AndNot] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void andNot(const X86RegMask& other) noexcept { + _packed.andNot(other._packed); + } + + ASMJIT_INLINE void andNot(uint32_t rc, uint32_t mask) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + switch (rc) { + case kX86RegClassGp : _gp &= ~static_cast(mask); break; + case kX86RegClassMm : _mm &= ~static_cast(mask); break; + case kX86RegClassK : _k &= ~static_cast(mask); break; + case kX86RegClassXyz: _xyz &= ~static_cast(mask); break; + } + } + + ASMJIT_INLINE void andNotGp(uint32_t mask) noexcept { andNot(kX86RegClassGp, mask); } + ASMJIT_INLINE void andNotMm(uint32_t mask) noexcept { andNot(kX86RegClassMm, mask); } + ASMJIT_INLINE void andNotK(uint32_t mask) noexcept { andNot(kX86RegClassK, mask); } + ASMJIT_INLINE void andNotXyz(uint32_t mask) noexcept { andNot(kX86RegClassXyz, mask); } + + // -------------------------------------------------------------------------- + // [Or] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void or_(const X86RegMask& other) noexcept { + _packed.or_(other._packed); + } + + ASMJIT_INLINE void or_(uint32_t rc, uint32_t mask) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + switch (rc) { + case kX86RegClassGp : _gp |= static_cast(mask); break; + case kX86RegClassMm : _mm |= static_cast(mask); break; + case kX86RegClassK : _k |= static_cast(mask); break; + case kX86RegClassXyz: _xyz |= static_cast(mask); break; + } + } + + ASMJIT_INLINE void orGp(uint32_t mask) noexcept { return or_(kX86RegClassGp, mask); } + ASMJIT_INLINE void orMm(uint32_t mask) noexcept { return or_(kX86RegClassMm, mask); } + ASMJIT_INLINE void orK(uint32_t mask) noexcept { return or_(kX86RegClassK, mask); } + ASMJIT_INLINE void orXyz(uint32_t mask) noexcept { return or_(kX86RegClassXyz, mask); } + + // -------------------------------------------------------------------------- + // [Xor] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void xor_(const X86RegMask& other) noexcept { + _packed.xor_(other._packed); + } + + ASMJIT_INLINE void xor_(uint32_t rc, uint32_t mask) noexcept { + ASMJIT_ASSERT(rc < _kX86RegClassManagedCount); + + switch (rc) { + case kX86RegClassGp : _gp ^= static_cast(mask); break; + case kX86RegClassMm : _mm ^= static_cast(mask); break; + case kX86RegClassK : _k ^= static_cast(mask); break; + case kX86RegClassXyz: _xyz ^= static_cast(mask); break; + } + } + + ASMJIT_INLINE void xorGp(uint32_t mask) noexcept { xor_(kX86RegClassGp, mask); } + ASMJIT_INLINE void xorMm(uint32_t mask) noexcept { xor_(kX86RegClassMm, mask); } + ASMJIT_INLINE void xorK(uint32_t mask) noexcept { xor_(kX86RegClassK, mask); } + ASMJIT_INLINE void xorXyz(uint32_t mask) noexcept { xor_(kX86RegClassXyz, mask); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + struct { + //! GP registers mask (16 bits). + uint16_t _gp; + //! MMX registers mask (8 bits). + uint8_t _mm; + //! K registers mask (8 bits). + uint8_t _k; + //! XMM/YMM/ZMM registers mask (32 bits). + uint32_t _xyz; + }; + + //! Packed masks. + UInt64 _packed; + }; +}; + +// ============================================================================ +// [asmjit::Reg] +// ============================================================================ + +// This is only defined by `x86operand_regs.cpp` when exporting registers. +#if defined(ASMJIT_EXPORTS_X86_REGS) + +// Remap all classes to POD structs so they can be statically initialized +// without calling a constructor. Compiler will store these in .DATA section. +// +// Kept in union to prevent LTO warnings. +class X86RipReg { public: union { Operand::VRegOp _vreg; }; }; +class X86SegReg { public: union { Operand::VRegOp _vreg; }; }; +class X86GpReg { public: union { Operand::VRegOp _vreg; }; }; +class X86FpReg { public: union { Operand::VRegOp _vreg; }; }; +class X86KReg { public: union { Operand::VRegOp _vreg; }; }; +class X86MmReg { public: union { Operand::VRegOp _vreg; }; }; +class X86XmmReg { public: union { Operand::VRegOp _vreg; }; }; +class X86YmmReg { public: union { Operand::VRegOp _vreg; }; }; +class X86ZmmReg { public: union { Operand::VRegOp _vreg; }; }; + +#else + +// ============================================================================ +// [asmjit::X86Reg] +// ============================================================================ + +//! X86/X86 register base class. +class X86Reg : public Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy X86 register. + ASMJIT_INLINE X86Reg() noexcept : Reg() {} + //! Create a reference to `other` X86 register. + ASMJIT_INLINE X86Reg(const X86Reg& other) noexcept : Reg(other) {} + //! Create a reference to `other` X86 register and change the index to `index`. + ASMJIT_INLINE X86Reg(const X86Reg& other, uint32_t index) noexcept : Reg(other, index) {} + //! Create a custom X86 register. + ASMJIT_INLINE X86Reg(uint32_t type, uint32_t index, uint32_t size) noexcept : Reg(type, index, size) {} + //! Create non-initialized X86 register. + explicit ASMJIT_INLINE X86Reg(const _NoInit&) noexcept : Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86Reg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86Reg) + + //! Get whether the register is a GP register (any size). + ASMJIT_INLINE bool isGp() const noexcept { return _vreg.type <= kX86RegTypeGpq; } + //! Get whether the register is a GPB register (8-bit). + ASMJIT_INLINE bool isGpb() const noexcept { return _vreg.type <= _kX86RegTypePatchedGpbHi; } + //! Get whether the register is a low GPB register (8-bit). + ASMJIT_INLINE bool isGpbLo() const noexcept { return _vreg.type == kX86RegTypeGpbLo; } + //! Get whether the register is a high GPB register (8-bit). + ASMJIT_INLINE bool isGpbHi() const noexcept { return _vreg.type == kX86RegTypeGpbHi; } + //! Get whether the register is a GPW register (16-bit). + ASMJIT_INLINE bool isGpw() const noexcept { return _vreg.type == kX86RegTypeGpw; } + //! Get whether the register is a GPD register (32-bit). + ASMJIT_INLINE bool isGpd() const noexcept { return _vreg.type == kX86RegTypeGpd; } + //! Get whether the register is a GPQ register (64-bit). + ASMJIT_INLINE bool isGpq() const noexcept { return _vreg.type == kX86RegTypeGpq; } + + //! Get whether the register is an FPU register (80-bit). + ASMJIT_INLINE bool isFp() const noexcept { return _vreg.type == kX86RegTypeFp; } + //! Get whether the register is an MMX register (64-bit). + ASMJIT_INLINE bool isMm() const noexcept { return _vreg.type == kX86RegTypeMm; } + + //! Get whether the register is a K register (64-bit). + ASMJIT_INLINE bool isK() const noexcept { return _vreg.type == kX86RegTypeK; } + + //! Get whether the register is an XMM register (128-bit). + ASMJIT_INLINE bool isXmm() const noexcept { return _vreg.type == kX86RegTypeXmm; } + //! Get whether the register is a YMM register (256-bit). + ASMJIT_INLINE bool isYmm() const noexcept { return _vreg.type == kX86RegTypeYmm; } + //! Get whether the register is a ZMM register (512-bit). + ASMJIT_INLINE bool isZmm() const noexcept { return _vreg.type == kX86RegTypeZmm; } + + //! Get whether the register is RIP. + ASMJIT_INLINE bool isRip() const noexcept { return _vreg.type == kX86RegTypeRip; } + //! Get whether the register is a segment register. + ASMJIT_INLINE bool isSeg() const noexcept { return _vreg.type == kX86RegTypeSeg; } + + // -------------------------------------------------------------------------- + // [Statics] + // -------------------------------------------------------------------------- + + //! Get whether the `op` operand is low or high GPB register. + static ASMJIT_INLINE bool isGpbReg(const Operand& op) noexcept { + const uint32_t mask = Utils::pack32_2x8_1x16( + 0xFF, 0xFF, ~(_kX86RegTypePatchedGpbHi << 8) & 0xFF00); + + return (op._packed[0].u32[0] & mask) == Utils::pack32_2x8_1x16(kTypeReg, 1, 0x0000); + } +}; + +// ============================================================================ +// [asmjit::X86RipReg] +// ============================================================================ + +//! X86/X64 RIP register. +class X86RipReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a RIP register. + ASMJIT_INLINE X86RipReg() noexcept : X86Reg(kX86RegTypeRip, 0, 0) {} + //! Create a reference to `other` RIP register. + ASMJIT_INLINE X86RipReg(const X86RipReg& other) noexcept : X86Reg(other) {} + //! Create non-initialized RIP register. + explicit ASMJIT_INLINE X86RipReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86RipReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86RipReg) +}; + +// ============================================================================ +// [asmjit::X86SegReg] +// ============================================================================ + +//! X86/X64 segment register. +class X86SegReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy segment register. + ASMJIT_INLINE X86SegReg() noexcept : X86Reg() {} + //! Create a reference to `other` segment register. + ASMJIT_INLINE X86SegReg(const X86SegReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` segment register and change the index to `index`. + ASMJIT_INLINE X86SegReg(const X86SegReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom segment register. + ASMJIT_INLINE X86SegReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized segment register. + explicit ASMJIT_INLINE X86SegReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86SegReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86SegReg) +}; + +// ============================================================================ +// [asmjit::X86GpReg] +// ============================================================================ + +//! X86/X64 general purpose register (GPB, GPW, GPD, GPQ). +class X86GpReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy Gp register. + ASMJIT_INLINE X86GpReg() noexcept : X86Reg() {} + //! Create a reference to `other` Gp register. + ASMJIT_INLINE X86GpReg(const X86GpReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` Gp register and change the index to `index`. + ASMJIT_INLINE X86GpReg(const X86GpReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom Gp register. + ASMJIT_INLINE X86GpReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized Gp register. + explicit ASMJIT_INLINE X86GpReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86GpReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86GpReg) + + // -------------------------------------------------------------------------- + // [X86GpReg Cast] + // -------------------------------------------------------------------------- + + //! Cast this register to the same register type/size as `other`. + //! + //! This function has been designed to help with maintaining code that runs + //! in both 32-bit and 64-bit modes. If you have registers that have mixed + //! types, use `X86GpReg::as()` to cast one type to another. + ASMJIT_INLINE X86GpReg as(const X86GpReg& other) const noexcept { + return X86GpReg(other.getRegType(), getRegIndex(), other.getSize()); + } + + //! Cast this register to 8-bit (LO) part. + ASMJIT_INLINE X86GpReg r8() const noexcept { return X86GpReg(kX86RegTypeGpbLo, getRegIndex(), 1); } + //! Cast this register to 8-bit (LO) part. + ASMJIT_INLINE X86GpReg r8Lo() const noexcept { return X86GpReg(kX86RegTypeGpbLo, getRegIndex(), 1); } + //! Cast this register to 8-bit (HI) part. + ASMJIT_INLINE X86GpReg r8Hi() const noexcept { return X86GpReg(kX86RegTypeGpbHi, getRegIndex(), 1); } + + //! Cast this register to 16-bit. + ASMJIT_INLINE X86GpReg r16() const noexcept { return X86GpReg(kX86RegTypeGpw, getRegIndex(), 2); } + //! Cast this register to 32-bit. + ASMJIT_INLINE X86GpReg r32() const noexcept { return X86GpReg(kX86RegTypeGpd, getRegIndex(), 4); } + //! Cast this register to 64-bit. + ASMJIT_INLINE X86GpReg r64() const noexcept { return X86GpReg(kX86RegTypeGpq, getRegIndex(), 8); } +}; + +// ============================================================================ +// [asmjit::X86FpReg] +// ============================================================================ + +//! X86/X64 80-bit Fp register. +class X86FpReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy Fp register. + ASMJIT_INLINE X86FpReg() noexcept : X86Reg() {} + //! Create a reference to `other` Fp register. + ASMJIT_INLINE X86FpReg(const X86FpReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` Fp register and change the index to `index`. + ASMJIT_INLINE X86FpReg(const X86FpReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom Fp register. + ASMJIT_INLINE X86FpReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized Fp register. + explicit ASMJIT_INLINE X86FpReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86FpReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86FpReg) +}; + +// ============================================================================ +// [asmjit::X86MmReg] +// ============================================================================ + +//! X86/X64 64-bit Mm register (MMX+). +//! +//! Structure of MMX register and it's memory mapping: +//! +//! ~~~ +//! Memory Bytes +//! +--+--+--+--+--+--+--+--+ +//! |00|01|02|03|04|05|06|07| +//! +--+--+--+--+--+--+--+--+ +//! +//! MMX Register +//! +-----------------------+ +//! | QWORD | +//! +-----------+-----------+ +//! | HI-DWORD | LO-DWORD | +//! +-----------+-----------+ +//! | W3 | W2 | W1 | W0 | +//! +--+--+--+--+--+--+--+--+ +//! |07|06|05|04|03|02|01|00| +//! +--+--+--+--+--+--+--+--+ +//! ~~~ +//! +//! Move instruction semantics: +//! +//! - `movd` - writes 4-bytes in low DWORD and clears high DWORD. +//! - `movq` - writes 8-bytes in `QWORD`. +class X86MmReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy Mm register. + ASMJIT_INLINE X86MmReg() noexcept : X86Reg() {} + //! Create a reference to `other` Mm register. + ASMJIT_INLINE X86MmReg(const X86MmReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` Mm register and change the index to `index`. + ASMJIT_INLINE X86MmReg(const X86MmReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom Mm register. + ASMJIT_INLINE X86MmReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized Mm register. + explicit ASMJIT_INLINE X86MmReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86MmReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86MmReg) +}; + +// ============================================================================ +// [asmjit::X86KReg] +// ============================================================================ + +//! X86/X64 64-bit K register (AVX512+). +class X86KReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy K register. + ASMJIT_INLINE X86KReg() noexcept : X86Reg() {} + //! Create a reference to `other` K register. + ASMJIT_INLINE X86KReg(const X86KReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` K register and change the index to `index`. + ASMJIT_INLINE X86KReg(const X86KReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom K register. + ASMJIT_INLINE X86KReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized K register. + explicit ASMJIT_INLINE X86KReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86KReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86KReg) +}; + +// ============================================================================ +// [asmjit::X86XmmReg] +// ============================================================================ + +//! X86/X64 128-bit XMM register (SSE+). +//! +//! Structure of XMM register and it's memory mapping: +//! +//! ~~~ +//! Memory Bytes +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! |00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15| +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! +//! XMM Register +//! +-----------------------------------------------+ +//! | DQWORD | +//! +-----------------------+-----------------------+ +//! | HI-QWORD/PD | LO-QWORD/SD | +//! +-----------+-----------+-----------+-----------+ +//! | D3/PS | D2/PS | D1/PS | D0/SS | +//! +-----------+-----------+-----------+-----------+ +//! | W7 | W6 | W5 | W4 | W3 | W2 | W1 | W0 | +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! |15|14|13|12|11|10|09|08|07|06|05|04|03|02|01|00| +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! ~~~ +//! +//! Move instruction semantics: +//! +//! - `movd` - writes 4-bytes in `D0` and clears the rest. +//! - `movq` - writes 8-bytes in low QWORD and clears the rest. +//! - `movq2dq` - writes 8 bytes in low QWORD and clears the rest. +//! +//! - `movss` - writes 4-bytes in `D0` +//! (the rest is zeroed only if the source operand is a memory location). +//! - `movsd` - writes 8-bytes in low QWORD +//! (the rest is zeroed only if the source operand is a memory location). +//! +//! - `movaps`, +//! `movups`, +//! `movapd`, +//! `movupd`, +//! `movdqu`, +//! `movdqa`, +//! `lddqu` - writes 16-bytes in DQWORD. +//! +//! - `movlps`, +//! `movlpd`, +//! `movhlps` - writes 8-bytes in low QWORD and keeps the rest untouched. +//! +//! - `movhps`, +//! `movhpd`, +//! `movlhps` - writes 8-bytes in high QWORD and keeps the rest untouched. +//! +//! - `movddup`, +//! - `movsldup`, +//! - `movshdup` - writes 16 bytes in DQWORD. +class X86XmmReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy XMM register. + ASMJIT_INLINE X86XmmReg() noexcept : X86Reg() {} + //! Create a reference to `other` XMM register. + ASMJIT_INLINE X86XmmReg(const X86XmmReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` XMM register and change the index to `index`. + ASMJIT_INLINE X86XmmReg(const X86XmmReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom XMM register. + ASMJIT_INLINE X86XmmReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized XMM register. + explicit ASMJIT_INLINE X86XmmReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86XmmReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86XmmReg) + + // -------------------------------------------------------------------------- + // [X86XmmReg Cast] + // -------------------------------------------------------------------------- + + //! Cast this register to XMM (clone). + ASMJIT_INLINE X86XmmReg xmm() const noexcept { return X86XmmReg(kX86RegTypeXmm, getRegIndex(), 16); } + //! Cast this register to YMM. + ASMJIT_INLINE X86YmmReg ymm() const noexcept; + //! Cast this register to ZMM. + ASMJIT_INLINE X86ZmmReg zmm() const noexcept; +}; + +// ============================================================================ +// [asmjit::X86YmmReg] +// ============================================================================ + +//! X86/X64 256-bit YMM register (AVX+). +//! +//! Structure of YMM register and it's memory mapping: +//! +//! ~~~ +//! Memory Bytes +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! |00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31| +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! +//! YMM Register +//! +-----------------------------------------------+-----------------------------------------------+ +//! | HI-DQWORD | LO-DQWORD | +//! +-----------------------+-----------------------+-----------------------+-----------------------+ +//! | Q3/PD | Q2/PD | Q1/PD | Q0/SD | +//! +-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +//! | D7/PS | D6/PS | D5/PS | D4/PS | D3/PS | D2/PS | D1/PS | D0/SS | +//! +-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +//! | W15 | W14 | W13 | W12 | W11 | W10 | W9 | W8 | W7 | W6 | W5 | W4 | W3 | W2 | W1 | W0 | +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|09|08|07|06|05|04|03|02|01|00| +//! +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ +//! ~~~ +class X86YmmReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy YMM register. + ASMJIT_INLINE X86YmmReg() noexcept : X86Reg() {} + //! Create a reference to `other` YMM register. + ASMJIT_INLINE X86YmmReg(const X86YmmReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` YMM register and change the index to `index`. + ASMJIT_INLINE X86YmmReg(const X86YmmReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom YMM register. + ASMJIT_INLINE X86YmmReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized YMM register. + explicit ASMJIT_INLINE X86YmmReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86YmmReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86YmmReg) + + // -------------------------------------------------------------------------- + // [X86YmmReg Cast] + // -------------------------------------------------------------------------- + + //! Cast this register to XMM. + ASMJIT_INLINE X86XmmReg xmm() const noexcept{ return X86XmmReg(kX86RegTypeXmm, getRegIndex(), 16); } + //! Cast this register to YMM (clone). + ASMJIT_INLINE X86YmmReg ymm() const noexcept { return X86YmmReg(kX86RegTypeYmm, getRegIndex(), 32); } + //! Cast this register to ZMM. + ASMJIT_INLINE X86ZmmReg zmm() const noexcept; +}; + +ASMJIT_INLINE X86YmmReg X86XmmReg::ymm() const noexcept { return X86YmmReg(kX86RegTypeYmm, getRegIndex(), 32); } + +// ============================================================================ +// [asmjit::X86ZmmReg] +// ============================================================================ + +//! X86/X64 512-bit ZMM register (AVX512+). +class X86ZmmReg : public X86Reg { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a dummy ZMM register. + ASMJIT_INLINE X86ZmmReg() noexcept : X86Reg() {} + //! Create a reference to `other` ZMM register. + ASMJIT_INLINE X86ZmmReg(const X86ZmmReg& other) noexcept : X86Reg(other) {} + //! Create a reference to `other` ZMM register and change the index to `index`. + ASMJIT_INLINE X86ZmmReg(const X86ZmmReg& other, uint32_t index) noexcept : X86Reg(other, index) {} + //! Create a custom ZMM register. + ASMJIT_INLINE X86ZmmReg(uint32_t type, uint32_t index, uint32_t size) noexcept : X86Reg(type, index, size) {} + //! Create non-initialized ZMM register. + explicit ASMJIT_INLINE X86ZmmReg(const _NoInit&) noexcept : X86Reg(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86ZmmReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86ZmmReg) + + // -------------------------------------------------------------------------- + // [X86ZmmReg Cast] + // -------------------------------------------------------------------------- + + //! Cast this register to XMM. + ASMJIT_INLINE X86XmmReg xmm() const noexcept { return X86XmmReg(kX86RegTypeXmm, getRegIndex(), 16); } + //! Cast this register to YMM. + ASMJIT_INLINE X86YmmReg ymm() const noexcept { return X86YmmReg(kX86RegTypeYmm, getRegIndex(), 32); } + //! Cast this register to ZMM (clone). + ASMJIT_INLINE X86ZmmReg zmm() const noexcept { return X86ZmmReg(kX86RegTypeZmm, getRegIndex(), 64); } +}; + +ASMJIT_INLINE X86ZmmReg X86XmmReg::zmm() const noexcept { return X86ZmmReg(kX86RegTypeZmm, getRegIndex(), 64); } +ASMJIT_INLINE X86ZmmReg X86YmmReg::zmm() const noexcept { return X86ZmmReg(kX86RegTypeZmm, getRegIndex(), 64); } + +// ============================================================================ +// [asmjit::X86Mem] +// ============================================================================ + +//! X86 memory operand. +class X86Mem : public BaseMem { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86Mem() noexcept : BaseMem(NoInit) { + reset(); + } + + ASMJIT_INLINE X86Mem(const Label& label, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeLabel, 0, label._base.id); + _init_packed_d2_d3(kInvalidValue, disp); + } + + ASMJIT_INLINE X86Mem(const Label& label, const X86GpReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeLabel, + (kX86MemVSibGpz << kX86MemVSibIndex) + + (shift << kX86MemShiftIndex), + label.getId()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const X86RipReg& rip, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_UNUSED(rip); + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeRip, 0, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, disp); + } + + ASMJIT_INLINE X86Mem(const X86GpReg& base, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + + (kX86MemVSibGpz << kX86MemVSibIndex), + base.getRegIndex()); + _init_packed_d2_d3(kInvalidValue, disp); + } + + ASMJIT_INLINE X86Mem(const X86GpReg& base, const X86GpReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + (shift << kX86MemShiftIndex), + base.getRegIndex()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const X86GpReg& base, const X86XmmReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + + (kX86MemVSibXmm << kX86MemVSibIndex) + + (shift << kX86MemShiftIndex), + base.getRegIndex()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const X86GpReg& base, const X86YmmReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + + (kX86MemVSibYmm << kX86MemVSibIndex) + + (shift << kX86MemShiftIndex), + base.getRegIndex()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + +#if !defined(ASMJIT_DISABLE_COMPILER) + ASMJIT_INLINE X86Mem(const Label& label, const X86GpVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeLabel, + (kX86MemVSibGpz << kX86MemVSibIndex) + + (shift << kX86MemShiftIndex), + label.getId()); + _vmem.index = ASMJIT_OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const X86GpVar& base, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (kX86MemVSibGpz << kX86MemVSibIndex), + ASMJIT_OP_ID(base)); + _init_packed_d2_d3(kInvalidValue, disp); + } + + ASMJIT_INLINE X86Mem(const X86GpVar& base, const X86GpVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (shift << kX86MemShiftIndex), + ASMJIT_OP_ID(base)); + _vmem.index = ASMJIT_OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const X86GpVar& base, const X86XmmVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (kX86MemVSibXmm << kX86MemVSibIndex) + + (shift << kX86MemShiftIndex), + ASMJIT_OP_ID(base)); + _vmem.index = ASMJIT_OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const X86GpVar& base, const X86YmmVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (kX86MemVSibYmm << kX86MemVSibIndex) + + (shift << kX86MemShiftIndex), + ASMJIT_OP_ID(base)); + _vmem.index = ASMJIT_OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const _Init&, uint32_t memType, const X86Var& base, int32_t disp, uint32_t size) noexcept : BaseMem(NoInit) { + _init_packed_op_sz_b0_b1_id(kTypeMem, size, memType, 0, ASMJIT_OP_ID(base)); + _vmem.index = kInvalidValue; + _vmem.displacement = disp; + } + + ASMJIT_INLINE X86Mem(const _Init&, uint32_t memType, const X86Var& base, const X86GpVar& index, uint32_t shift, int32_t disp, uint32_t size) noexcept : BaseMem(NoInit) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kTypeMem, size, memType, shift << kX86MemShiftIndex, ASMJIT_OP_ID(base)); + _vmem.index = ASMJIT_OP_ID(index); + _vmem.displacement = disp; + } +#endif // !ASMJIT_DISABLE_COMPILER + + ASMJIT_INLINE X86Mem(const X86Mem& other) noexcept : BaseMem(other) {} + explicit ASMJIT_INLINE X86Mem(const _NoInit&) noexcept : BaseMem(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86Mem Specific] + // -------------------------------------------------------------------------- + + //! Clone X86Mem operand. + ASMJIT_INLINE X86Mem clone() const noexcept { + return X86Mem(*this); + } + + //! Reset X86Mem operand. + ASMJIT_INLINE void reset() noexcept { + _init_packed_op_sz_b0_b1_id(kTypeMem, 0, kMemTypeBaseIndex, 0, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, 0); + } + + //! \internal + ASMJIT_INLINE void _init(uint32_t memType, uint32_t base, int32_t disp, uint32_t size) noexcept { + _init_packed_op_sz_b0_b1_id(kTypeMem, size, memType, 0, base); + _vmem.index = kInvalidValue; + _vmem.displacement = disp; + } + + // -------------------------------------------------------------------------- + // [Segment] + // -------------------------------------------------------------------------- + + //! Get whether the memory operand has segment override prefix. + ASMJIT_INLINE bool hasSegment() const noexcept { + return (_vmem.flags & kX86MemSegMask) != (kX86SegDefault << kX86MemSegIndex); + } + + //! Get memory operand segment, see `X86Seg`. + ASMJIT_INLINE uint32_t getSegment() const noexcept{ + return (static_cast(_vmem.flags) >> kX86MemSegIndex) & kX86MemSegBits; + } + + //! Set memory operand segment, see `X86Seg`. + ASMJIT_INLINE X86Mem& setSegment(uint32_t segIndex) noexcept { + _vmem.flags = static_cast( + (static_cast(_vmem.flags) & kX86MemSegMask) + (segIndex << kX86MemSegIndex)); + return *this; + } + + //! Set memory operand segment, see `X86Seg`. + ASMJIT_INLINE X86Mem& setSegment(const X86SegReg& seg) noexcept { + return setSegment(seg.getRegIndex()); + } + + // -------------------------------------------------------------------------- + // [Gpd] + // -------------------------------------------------------------------------- + + //! Get whether the memory operand has 32-bit GP base. + ASMJIT_INLINE bool hasGpdBase() const noexcept { + return (_packed[0].u32[0] & Utils::pack32_4x8(0x00, 0x00, 0x00, kX86MemGpdMask)) != 0; + } + + //! Set whether the memory operand has 32-bit GP base. + ASMJIT_INLINE X86Mem& setGpdBase() noexcept { + _packed[0].u32[0] |= Utils::pack32_4x8(0x00, 0x00, 0x00, kX86MemGpdMask); + return *this; + } + + //! Set whether the memory operand has 32-bit GP base to `b`. + ASMJIT_INLINE X86Mem& setGpdBase(uint32_t b) noexcept { + _packed[0].u32[0] &=~Utils::pack32_4x8(0x00, 0x00, 0x00, kX86MemGpdMask); + _packed[0].u32[0] |= Utils::pack32_4x8(0x00, 0x00, 0x00, b << kX86MemGpdIndex); + return *this; + } + + // -------------------------------------------------------------------------- + // [VSib] + // -------------------------------------------------------------------------- + + //! Get V-SIB type. + ASMJIT_INLINE uint32_t getVSib() const noexcept { + return (static_cast(_vmem.flags) >> kX86MemVSibIndex) & kX86MemVSibBits; + } + + //! Set V-SIB type. + ASMJIT_INLINE X86Mem& _setVSib(uint32_t vsib) noexcept { + _packed[0].u32[0] &=~Utils::pack32_4x8(0x00, 0x00, 0x00, kX86MemVSibMask); + _packed[0].u32[0] |= Utils::pack32_4x8(0x00, 0x00, 0x00, vsib << kX86MemVSibIndex); + return *this; + } + + // -------------------------------------------------------------------------- + // [Size] + // -------------------------------------------------------------------------- + + //! Set memory operand size. + ASMJIT_INLINE X86Mem& setSize(uint32_t size) noexcept { + _vmem.size = static_cast(size); + return *this; + } + + // -------------------------------------------------------------------------- + // [Base] + // -------------------------------------------------------------------------- + + //! Get whether the memory operand has base register. + ASMJIT_INLINE bool hasBase() const noexcept { + return _vmem.base != kInvalidValue; + } + + //! Get memory operand base register code, variable id, or `kInvalidValue`. + ASMJIT_INLINE uint32_t getBase() const noexcept { + return _vmem.base; + } + + //! Set memory operand base register code, variable id, or `kInvalidValue`. + ASMJIT_INLINE X86Mem& setBase(uint32_t base) noexcept { + _vmem.base = base; + return *this; + } + + // -------------------------------------------------------------------------- + // [Index] + // -------------------------------------------------------------------------- + + //! Get whether the memory operand has index. + ASMJIT_INLINE bool hasIndex() const noexcept { + return _vmem.index != kInvalidValue; + } + + //! Get memory operand index register code, variable id, or `kInvalidValue`. + ASMJIT_INLINE uint32_t getIndex() const noexcept { + return _vmem.index; + } + + //! Set memory operand index register code, variable id, or `kInvalidValue`. + ASMJIT_INLINE X86Mem& setIndex(uint32_t index) noexcept { + _vmem.index = index; + return *this; + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86GpReg& index) noexcept { + _vmem.index = index.getRegIndex(); + return _setVSib(kX86MemVSibGpz); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86GpReg& index, uint32_t shift) noexcept { + _vmem.index = index.getRegIndex(); + return _setVSib(kX86MemVSibGpz).setShift(shift); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86XmmReg& index) noexcept { + _vmem.index = index.getRegIndex(); + return _setVSib(kX86MemVSibXmm); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86XmmReg& index, uint32_t shift) noexcept { + _vmem.index = index.getRegIndex(); + return _setVSib(kX86MemVSibXmm).setShift(shift); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86YmmReg& index) noexcept { + _vmem.index = index.getRegIndex(); + return _setVSib(kX86MemVSibYmm); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86YmmReg& index, uint32_t shift) noexcept { + _vmem.index = index.getRegIndex(); + return _setVSib(kX86MemVSibYmm).setShift(shift); + } + +#if !defined(ASMJIT_DISABLE_COMPILER) + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86GpVar& index) noexcept { + _vmem.index = ASMJIT_OP_ID(index); + return _setVSib(kX86MemVSibGpz); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86GpVar& index, uint32_t shift) noexcept { + _vmem.index = ASMJIT_OP_ID(index); + return _setVSib(kX86MemVSibGpz).setShift(shift); + } + + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86XmmVar& index) noexcept { + _vmem.index = ASMJIT_OP_ID(index); + return _setVSib(kX86MemVSibXmm); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86XmmVar& index, uint32_t shift) noexcept { + _vmem.index = ASMJIT_OP_ID(index); + return _setVSib(kX86MemVSibXmm).setShift(shift); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86YmmVar& index) noexcept { + _vmem.index = ASMJIT_OP_ID(index); + return _setVSib(kX86MemVSibYmm); + } + + //! Set memory index. + ASMJIT_INLINE X86Mem& setIndex(const X86YmmVar& index, uint32_t shift) noexcept { + _vmem.index = ASMJIT_OP_ID(index); + return _setVSib(kX86MemVSibYmm).setShift(shift); + } +#endif // !ASMJIT_DISABLE_COMPILER + + //! Reset memory index. + ASMJIT_INLINE X86Mem& resetIndex() noexcept { + _vmem.index = kInvalidValue; + return _setVSib(kX86MemVSibGpz); + } + + // -------------------------------------------------------------------------- + // [Misc] + // -------------------------------------------------------------------------- + + //! Get whether the memory operand has base and index register. + ASMJIT_INLINE bool hasBaseOrIndex() const noexcept { + return _vmem.base != kInvalidValue || _vmem.index != kInvalidValue; + } + + //! Get whether the memory operand has base and index register. + ASMJIT_INLINE bool hasBaseAndIndex() const noexcept { + return _vmem.base != kInvalidValue && _vmem.index != kInvalidValue; + } + + // -------------------------------------------------------------------------- + // [Shift] + // -------------------------------------------------------------------------- + + //! Get whether the memory operand has shift used. + ASMJIT_INLINE bool hasShift() const noexcept { + return (_vmem.flags & kX86MemShiftMask) != 0; + } + + //! Get memory operand index shift (0, 1, 2 or 3). + ASMJIT_INLINE uint32_t getShift() const noexcept { + return _vmem.flags >> kX86MemShiftIndex; + } + + //! Set memory operand index shift (0, 1, 2 or 3). + ASMJIT_INLINE X86Mem& setShift(uint32_t shift) noexcept { + _packed[0].u32[0] &=~Utils::pack32_4x8(0x00, 0x00, 0x00, kX86MemShiftMask); + _packed[0].u32[0] |= Utils::pack32_4x8(0x00, 0x00, 0x00, shift << kX86MemShiftIndex); + return *this; + } + + // -------------------------------------------------------------------------- + // [Displacement] + // -------------------------------------------------------------------------- + + //! Get memory operand relative displacement. + ASMJIT_INLINE int32_t getDisplacement() const noexcept { + return _vmem.displacement; + } + + //! Set memory operand relative displacement. + ASMJIT_INLINE X86Mem& setDisplacement(int32_t disp) noexcept { + _vmem.displacement = disp; + return *this; + } + + //! Reset memory operand relative displacement. + ASMJIT_INLINE X86Mem& resetDisplacement() noexcept { + _vmem.displacement = 0; + return *this; + } + + //! Adjust memory operand relative displacement by `disp`. + ASMJIT_INLINE X86Mem& adjust(int32_t disp) noexcept { + _vmem.displacement += disp; + return *this; + } + + //! Get new memory operand adjusted by `disp`. + ASMJIT_INLINE X86Mem adjusted(int32_t disp) const noexcept { + X86Mem result(*this); + result.adjust(disp); + return result; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86Mem& operator=(const X86Mem& other) noexcept { + _copy(other); + return *this; + } + + ASMJIT_INLINE bool operator==(const X86Mem& other) const noexcept { + return (_packed[0] == other._packed[0]) && (_packed[1] == other._packed[1]) ; + } + + ASMJIT_INLINE bool operator!=(const X86Mem& other) const noexcept { + return !(*this == other); + } + + // -------------------------------------------------------------------------- + // [Statics] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t _getGpdFlags(const Operand& base) noexcept { + return (base._vreg.size & 0x4) << (kX86MemGpdIndex - 2); + } +}; + +// ============================================================================ +// [asmjit::X86Var] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! Base class for all X86 variables. +class X86Var : public Var { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new uninitialized `X86Var` instance. + ASMJIT_INLINE X86Var() noexcept : Var(NoInit) { reset(); } + //! Create a clone of `other`. + ASMJIT_INLINE X86Var(const X86Var& other) noexcept : Var(other) {} + //! Create a new uninitialized `X86Var` instance (internal). + explicit ASMJIT_INLINE X86Var(const _NoInit&) noexcept : Var(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86Var Specific] + // -------------------------------------------------------------------------- + + //! Clone X86Var operand. + ASMJIT_INLINE X86Var clone() const noexcept { return X86Var(*this); } + + // -------------------------------------------------------------------------- + // [Type] + // -------------------------------------------------------------------------- + + //! Get register type. + ASMJIT_INLINE uint32_t getRegType() const noexcept { return _vreg.type; } + //! Get variable type. + ASMJIT_INLINE uint32_t getVarType() const noexcept { return _vreg.vType; } + + //! Get whether the variable is a general purpose register. + ASMJIT_INLINE bool isGp() const noexcept { return _vreg.type <= kX86RegTypeGpq; } + //! Get whether the variable is a GPB (8-bit) register. + ASMJIT_INLINE bool isGpb() const noexcept { return _vreg.type <= kX86RegTypeGpbHi; } + //! Get whether the variable is a low GPB (8-bit) register. + ASMJIT_INLINE bool isGpbLo() const noexcept { return _vreg.type == kX86RegTypeGpbLo; } + //! Get whether the variable is a high GPB (8-bit) register. + ASMJIT_INLINE bool isGpbHi() const noexcept { return _vreg.type == kX86RegTypeGpbHi; } + //! Get whether the variable is a GPW (16-bit) register. + ASMJIT_INLINE bool isGpw() const noexcept { return _vreg.type == kX86RegTypeGpw; } + //! Get whether the variable is a GPD (32-bit) register. + ASMJIT_INLINE bool isGpd() const noexcept { return _vreg.type == kX86RegTypeGpd; } + //! Get whether the variable is a GPQ (64-bit) register. + ASMJIT_INLINE bool isGpq() const noexcept { return _vreg.type == kX86RegTypeGpq; } + + //! Get whether the variable is MMX (64-bit) register. + ASMJIT_INLINE bool isMm() const noexcept { return _vreg.type == kX86RegTypeMm; } + //! Get whether the variable is K (64-bit) register. + ASMJIT_INLINE bool isK() const noexcept { return _vreg.type == kX86RegTypeK; } + + //! Get whether the variable is XMM (128-bit) register. + ASMJIT_INLINE bool isXmm() const noexcept { return _vreg.type == kX86RegTypeXmm; } + //! Get whether the variable is YMM (256-bit) register. + ASMJIT_INLINE bool isYmm() const noexcept { return _vreg.type == kX86RegTypeYmm; } + //! Get whether the variable is ZMM (512-bit) register. + ASMJIT_INLINE bool isZmm() const noexcept { return _vreg.type == kX86RegTypeZmm; } + + // -------------------------------------------------------------------------- + // [Memory Cast] + // -------------------------------------------------------------------------- + + //! Cast this variable to a memory operand. + //! + //! NOTE: Size of operand depends on native variable type, you can use other + //! variants if you want specific one. + ASMJIT_INLINE X86Mem m(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, getSize()); + } + + //! \overload + ASMJIT_INLINE X86Mem m(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, getSize()); + } + + //! Cast this variable to 8-bit memory operand. + ASMJIT_INLINE X86Mem m8(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 1); + } + + //! \overload + ASMJIT_INLINE X86Mem m8(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 1); + } + + //! Cast this variable to 16-bit memory operand. + ASMJIT_INLINE X86Mem m16(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 2); + } + + //! \overload + ASMJIT_INLINE X86Mem m16(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 2); + } + + //! Cast this variable to 32-bit memory operand. + ASMJIT_INLINE X86Mem m32(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 4); + } + + //! \overload + ASMJIT_INLINE X86Mem m32(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 4); + } + + //! Cast this variable to 64-bit memory operand. + ASMJIT_INLINE X86Mem m64(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 8); + } + + //! \overload + ASMJIT_INLINE X86Mem m64(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 8); + } + + //! Cast this variable to 80-bit memory operand (long double). + ASMJIT_INLINE X86Mem m80(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 10); + } + + //! \overload + ASMJIT_INLINE X86Mem m80(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 10); + } + + //! Cast this variable to 128-bit memory operand. + ASMJIT_INLINE X86Mem m128(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 16); + } + + //! \overload + ASMJIT_INLINE X86Mem m128(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 16); + } + + //! Cast this variable to 256-bit memory operand. + ASMJIT_INLINE X86Mem m256(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 32); + } + + //! \overload + ASMJIT_INLINE X86Mem m256(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 32); + } + + //! Cast this variable to 256-bit memory operand. + ASMJIT_INLINE X86Mem m512(int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, disp, 64); + } + + //! \overload + ASMJIT_INLINE X86Mem m512(const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) const noexcept { + return X86Mem(Init, kMemTypeStackIndex, *this, index, shift, disp, 64); + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86Var& operator=(const X86Var& other) noexcept { + _copy(other); + return *this; + } + + ASMJIT_INLINE bool operator==(const X86Var& other) const noexcept { + return _packed[0] == other._packed[0]; + } + + ASMJIT_INLINE bool operator!=(const X86Var& other) const noexcept { + return _packed[0] != other._packed[0]; + } + + // -------------------------------------------------------------------------- + // [Private] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_INLINE X86Var(const X86Var& other, uint32_t reg, uint32_t size) noexcept : Var(NoInit) { + _init_packed_op_sz_w0_id(kTypeVar, size, (reg << 8) + other._vreg.index, other._base.id); + _vreg.vType = other._vreg.vType; + } +}; +#endif // !ASMJIT_DISABLE_COMPILER + +// ============================================================================ +// [asmjit::X86GpVar] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! Gp variable. +class X86GpVar : public X86Var { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_INLINE X86GpVar(const X86GpVar& other, uint32_t reg, uint32_t size) noexcept + : X86Var(other, reg, size) {} + +public: + //! Create a new uninitialized `X86GpVar` instance. + ASMJIT_INLINE X86GpVar() noexcept : X86Var() {} + //! Create a clone of `other`. + ASMJIT_INLINE X86GpVar(const X86GpVar& other) noexcept : X86Var(other) {} + //! Create a new uninitialized `X86GpVar` instance (internal). + explicit ASMJIT_INLINE X86GpVar(const _NoInit&) noexcept : X86Var(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86GpVar Specific] + // -------------------------------------------------------------------------- + + //! Clone X86GpVar operand. + ASMJIT_INLINE X86GpVar clone() const noexcept { return X86GpVar(*this); } + //! Reset X86GpVar operand. + ASMJIT_INLINE void reset() noexcept { X86Var::reset(); } + + // -------------------------------------------------------------------------- + // [X86GpVar Cast] + // -------------------------------------------------------------------------- + + //! Cast this variable to the same register type/size as `other`. + //! + //! This function has been designed to help with maintaining code that runs + //! in both 32-bit and 64-bit modes. If you have variables that have mixed + //! types, use `X86GpVar::as()` to cast one type to another. + ASMJIT_INLINE X86GpVar as(const X86GpVar& other) const noexcept { + return X86GpVar(*this, other.getRegType(), other.getSize()); + } + + //! Cast this variable to 8-bit (LO) part of variable. + ASMJIT_INLINE X86GpVar r8() const noexcept { return X86GpVar(*this, kX86RegTypeGpbLo, 1); } + //! Cast this variable to 8-bit (LO) part of variable. + ASMJIT_INLINE X86GpVar r8Lo() const noexcept { return X86GpVar(*this, kX86RegTypeGpbLo, 1); } + //! Cast this variable to 8-bit (HI) part of variable. + ASMJIT_INLINE X86GpVar r8Hi() const noexcept { return X86GpVar(*this, kX86RegTypeGpbHi, 1); } + + //! Cast this variable to 16-bit part of variable. + ASMJIT_INLINE X86GpVar r16() const noexcept { return X86GpVar(*this, kX86RegTypeGpw, 2); } + //! Cast this variable to 32-bit part of variable. + ASMJIT_INLINE X86GpVar r32() const noexcept { return X86GpVar(*this, kX86RegTypeGpd, 4); } + //! Cast this variable to 64-bit part of variable. + ASMJIT_INLINE X86GpVar r64() const noexcept { return X86GpVar(*this, kX86RegTypeGpq, 8); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86GpVar& operator=(const X86GpVar& other) noexcept { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const X86GpVar& other) const noexcept { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const X86GpVar& other) const noexcept { return X86Var::operator!=(other); } +}; +#endif // !ASMJIT_DISABLE_COMPILER + +// ============================================================================ +// [asmjit::X86MmVar] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! Mm variable. +class X86MmVar : public X86Var { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Create a new uninitialized `X86MmVar` instance. + ASMJIT_INLINE X86MmVar() noexcept : X86Var() {} + //! Create a clone of `other`. + ASMJIT_INLINE X86MmVar(const X86MmVar& other) noexcept : X86Var(other) {} + + //! Create a new uninitialized `X86MmVar` instance (internal). + explicit ASMJIT_INLINE X86MmVar(const _NoInit&) noexcept : X86Var(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86MmVar Specific] + // -------------------------------------------------------------------------- + + //! Clone X86MmVar operand. + ASMJIT_INLINE X86MmVar clone() const noexcept { return X86MmVar(*this); } + //! Reset X86MmVar operand. + ASMJIT_INLINE void reset() noexcept { X86Var::reset(); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86MmVar& operator=(const X86MmVar& other) noexcept { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const X86MmVar& other) const noexcept { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const X86MmVar& other) const noexcept { return X86Var::operator!=(other); } +}; +#endif // !ASMJIT_DISABLE_COMPILER + +// ============================================================================ +// [asmjit::X86XmmVar] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! XMM variable. +class X86XmmVar : public X86Var { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_INLINE X86XmmVar(const X86Var& other, uint32_t reg, uint32_t size) noexcept + : X86Var(other, reg, size) {} + + friend class X86YmmVar; + friend class X86ZmmVar; + +public: + //! Create a new uninitialized `X86XmmVar` instance. + ASMJIT_INLINE X86XmmVar() noexcept : X86Var() {} + //! Create a clone of `other`. + ASMJIT_INLINE X86XmmVar(const X86XmmVar& other) noexcept : X86Var(other) {} + //! Create a new uninitialized `X86XmmVar` instance (internal). + explicit ASMJIT_INLINE X86XmmVar(const _NoInit&) noexcept : X86Var(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86XmmVar Specific] + // -------------------------------------------------------------------------- + + //! Clone X86XmmVar operand. + ASMJIT_INLINE X86XmmVar clone() const noexcept { return X86XmmVar(*this); } + //! Reset X86XmmVar operand. + ASMJIT_INLINE void reset() noexcept { X86Var::reset(); } + + // -------------------------------------------------------------------------- + // [X86XmmVar Cast] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86XmmVar xmm() const noexcept { return X86XmmVar(*this); } + ASMJIT_INLINE X86YmmVar ymm() const noexcept; + ASMJIT_INLINE X86ZmmVar zmm() const noexcept; + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86XmmVar& operator=(const X86XmmVar& other) noexcept { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const X86XmmVar& other) const noexcept { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const X86XmmVar& other) const noexcept { return X86Var::operator!=(other); } +}; +#endif // !ASMJIT_DISABLE_COMPILER + +// ============================================================================ +// [asmjit::X86YmmVar] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! YMM variable. +class X86YmmVar : public X86Var { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_INLINE X86YmmVar(const X86Var& other, uint32_t reg, uint32_t size) noexcept + : X86Var(other, reg, size) {} + + friend class X86XmmVar; + friend class X86ZmmVar; + +public: + //! Create a new uninitialized `X86YmmVar` instance. + ASMJIT_INLINE X86YmmVar() noexcept : X86Var() {} + //! Create a clone of `other`. + ASMJIT_INLINE X86YmmVar(const X86YmmVar& other) noexcept : X86Var(other) {} + //! Create a new uninitialized `X86YmmVar` instance (internal). + explicit ASMJIT_INLINE X86YmmVar(const _NoInit&) noexcept : X86Var(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86YmmVar Specific] + // -------------------------------------------------------------------------- + + //! Clone X86YmmVar operand. + ASMJIT_INLINE X86YmmVar clone() const noexcept { return X86YmmVar(*this); } + //! Reset X86YmmVar operand. + ASMJIT_INLINE void reset() noexcept { X86Var::reset(); } + + // -------------------------------------------------------------------------- + // [X86YmmVar Cast] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86XmmVar xmm() const noexcept { return X86XmmVar(*this, kX86RegTypeXmm, 8); } + ASMJIT_INLINE X86YmmVar ymm() const noexcept { return X86YmmVar(*this); } + ASMJIT_INLINE X86ZmmVar zmm() const noexcept; + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86YmmVar& operator=(const X86YmmVar& other) noexcept { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const X86YmmVar& other) const noexcept { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const X86YmmVar& other) const noexcept { return X86Var::operator!=(other); } +}; + +ASMJIT_INLINE X86YmmVar X86XmmVar::ymm() const noexcept { return X86YmmVar(*this, kX86RegTypeYmm, 16); } +#endif // !ASMJIT_DISABLE_COMPILER + +// ============================================================================ +// [asmjit::X86ZmmVar] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! ZMM variable. +class X86ZmmVar : public X86Var { +public: + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_INLINE X86ZmmVar(const X86Var& other, uint32_t reg, uint32_t size) noexcept + : X86Var(other, reg, size) {} + + friend class X86XmmVar; + friend class X86YmmVar; + +public: + //! Create a new uninitialized `X86ZmmVar` instance. + ASMJIT_INLINE X86ZmmVar() noexcept : X86Var() {} + //! Create a clone of `other`. + ASMJIT_INLINE X86ZmmVar(const X86ZmmVar& other) noexcept : X86Var(other) {} + //! Create a new uninitialized `X86ZmmVar` instance (internal). + explicit ASMJIT_INLINE X86ZmmVar(const _NoInit&) noexcept : X86Var(NoInit) {} + + // -------------------------------------------------------------------------- + // [X86ZmmVar Specific] + // -------------------------------------------------------------------------- + + //! Clone X86ZmmVar operand. + ASMJIT_INLINE X86ZmmVar clone() const noexcept { return X86ZmmVar(*this); } + //! Reset X86ZmmVar operand. + ASMJIT_INLINE void reset() noexcept { X86Var::reset(); } + + // -------------------------------------------------------------------------- + // [X86ZmmVar Cast] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86XmmVar xmm() const noexcept { return X86XmmVar(*this, kX86RegTypeYmm, 8); } + ASMJIT_INLINE X86YmmVar ymm() const noexcept { return X86YmmVar(*this, kX86RegTypeYmm, 16); } + ASMJIT_INLINE X86ZmmVar zmm() const noexcept { return X86ZmmVar(*this); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86ZmmVar& operator=(const X86ZmmVar& other) noexcept { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const X86ZmmVar& other) const noexcept { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const X86ZmmVar& other) const noexcept { return X86Var::operator!=(other); } +}; + +ASMJIT_INLINE X86ZmmVar X86XmmVar::zmm() const noexcept { return X86ZmmVar(*this, kX86RegTypeZmm, 32); } +ASMJIT_INLINE X86ZmmVar X86YmmVar::zmm() const noexcept { return X86ZmmVar(*this, kX86RegTypeZmm, 32); } +#endif // !ASMJIT_DISABLE_COMPILER +#endif + +// ============================================================================ +// [asmjit::X86RegData] +// ============================================================================ + +struct X86RegData { + X86GpReg gpd[16]; + X86GpReg gpq[16]; + + X86GpReg gpbLo[16]; + X86GpReg gpbHi[4]; + X86GpReg gpw[16]; + + X86XmmReg xmm[32]; + X86YmmReg ymm[32]; + X86ZmmReg zmm[32]; + + X86KReg k[8]; + X86FpReg fp[8]; + X86MmReg mm[8]; + + X86SegReg seg[7]; + + X86GpReg noGp; + X86RipReg rip; +}; +ASMJIT_VARAPI const X86RegData x86RegData; + +// ============================================================================ +// [asmjit::x86] +// ============================================================================ + +// The macro is only defined by `x86operand_regs.cpp` when exporting registers. +#if !defined(ASMJIT_EXPORTS_X86_REGS) + +namespace x86 { + +// ============================================================================ +// [asmjit::x86 - Reg] +// ============================================================================ + +#define ASMJIT_DEF_REG(_Type_, _Name_, _Field_) \ + static const _Type_& _Name_ = x86RegData._Field_; + +ASMJIT_DEF_REG(X86GpReg , eax , gpd[0]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , ecx , gpd[1]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , edx , gpd[2]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , ebx , gpd[3]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , esp , gpd[4]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , ebp , gpd[5]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , esi , gpd[6]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , edi , gpd[7]) //!< 32-bit GPD register. +ASMJIT_DEF_REG(X86GpReg , r8d , gpd[8]) //!< 32-bit GPD register (X64). +ASMJIT_DEF_REG(X86GpReg , r9d , gpd[9]) //!< 32-bit GPD register (X64). +ASMJIT_DEF_REG(X86GpReg , r10d , gpd[10]) //!< 32-bit GPD register (X64). +ASMJIT_DEF_REG(X86GpReg , r11d , gpd[11]) //!< 32-bit GPD register (X64). +ASMJIT_DEF_REG(X86GpReg , r12d , gpd[12]) //!< 32-bit GPD register (X64). +ASMJIT_DEF_REG(X86GpReg , r13d , gpd[13]) //!< 32-bit GPD register (X64). +ASMJIT_DEF_REG(X86GpReg , r14d , gpd[14]) //!< 32-bit GPD register (X64). +ASMJIT_DEF_REG(X86GpReg , r15d , gpd[15]) //!< 32-bit GPD register (X64). + +ASMJIT_DEF_REG(X86GpReg , rax , gpq[0]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , rcx , gpq[1]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , rdx , gpq[2]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , rbx , gpq[3]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , rsp , gpq[4]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , rbp , gpq[5]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , rsi , gpq[6]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , rdi , gpq[7]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r8 , gpq[8]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r9 , gpq[9]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r10 , gpq[10]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r11 , gpq[11]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r12 , gpq[12]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r13 , gpq[13]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r14 , gpq[14]) //!< 64-bit GPQ register (X64). +ASMJIT_DEF_REG(X86GpReg , r15 , gpq[15]) //!< 64-bit GPQ register (X64). + +ASMJIT_DEF_REG(X86GpReg , al , gpbLo[0]) //!< 8-bit low GPB register. +ASMJIT_DEF_REG(X86GpReg , cl , gpbLo[1]) //!< 8-bit low GPB register. +ASMJIT_DEF_REG(X86GpReg , dl , gpbLo[2]) //!< 8-bit low GPB register. +ASMJIT_DEF_REG(X86GpReg , bl , gpbLo[3]) //!< 8-bit low GPB register. +ASMJIT_DEF_REG(X86GpReg , spl , gpbLo[4]) //!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , bpl , gpbLo[5]) //!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , sil , gpbLo[6]) //!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , dil , gpbLo[7]) //!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r8b , gpbLo[8]) //!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r9b , gpbLo[9]) //!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r10b , gpbLo[10])//!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r11b , gpbLo[11])//!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r12b , gpbLo[12])//!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r13b , gpbLo[13])//!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r14b , gpbLo[14])//!< 8-bit low GPB register (X64). +ASMJIT_DEF_REG(X86GpReg , r15b , gpbLo[15])//!< 8-bit low GPB register (X64). + +ASMJIT_DEF_REG(X86GpReg , ah , gpbHi[0]) //!< 8-bit high GPB register. +ASMJIT_DEF_REG(X86GpReg , ch , gpbHi[1]) //!< 8-bit high GPB register. +ASMJIT_DEF_REG(X86GpReg , dh , gpbHi[2]) //!< 8-bit high GPB register. +ASMJIT_DEF_REG(X86GpReg , bh , gpbHi[3]) //!< 8-bit high GPB register. + +ASMJIT_DEF_REG(X86GpReg , ax , gpw[0]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , cx , gpw[1]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , dx , gpw[2]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , bx , gpw[3]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , sp , gpw[4]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , bp , gpw[5]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , si , gpw[6]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , di , gpw[7]) //!< 16-bit GPW register. +ASMJIT_DEF_REG(X86GpReg , r8w , gpw[8]) //!< 16-bit GPW register (X64). +ASMJIT_DEF_REG(X86GpReg , r9w , gpw[9]) //!< 16-bit GPW register (X64). +ASMJIT_DEF_REG(X86GpReg , r10w , gpw[10]) //!< 16-bit GPW register (X64). +ASMJIT_DEF_REG(X86GpReg , r11w , gpw[11]) //!< 16-bit GPW register (X64). +ASMJIT_DEF_REG(X86GpReg , r12w , gpw[12]) //!< 16-bit GPW register (X64). +ASMJIT_DEF_REG(X86GpReg , r13w , gpw[13]) //!< 16-bit GPW register (X64). +ASMJIT_DEF_REG(X86GpReg , r14w , gpw[14]) //!< 16-bit GPW register (X64). +ASMJIT_DEF_REG(X86GpReg , r15w , gpw[15]) //!< 16-bit GPW register (X64). + +ASMJIT_DEF_REG(X86XmmReg, xmm0 , xmm[0]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm1 , xmm[1]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm2 , xmm[2]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm3 , xmm[3]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm4 , xmm[4]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm5 , xmm[5]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm6 , xmm[6]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm7 , xmm[7]) //!< 128-bit XMM register. +ASMJIT_DEF_REG(X86XmmReg, xmm8 , xmm[8]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm9 , xmm[9]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm10, xmm[10]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm11, xmm[11]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm12, xmm[12]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm13, xmm[13]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm14, xmm[14]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm15, xmm[15]) //!< 128-bit XMM register (X64). +ASMJIT_DEF_REG(X86XmmReg, xmm16, xmm[16]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm17, xmm[17]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm18, xmm[18]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm19, xmm[19]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm20, xmm[20]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm21, xmm[21]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm22, xmm[22]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm23, xmm[23]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm24, xmm[24]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm25, xmm[25]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm26, xmm[26]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm27, xmm[27]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm28, xmm[28]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm29, xmm[29]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm30, xmm[30]) //!< 128-bit XMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86XmmReg, xmm31, xmm[31]) //!< 128-bit XMM register (X64 & AVX512VL+). + +ASMJIT_DEF_REG(X86YmmReg, ymm0 , ymm[0]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm1 , ymm[1]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm2 , ymm[2]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm3 , ymm[3]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm4 , ymm[4]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm5 , ymm[5]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm6 , ymm[6]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm7 , ymm[7]) //!< 256-bit YMM register. +ASMJIT_DEF_REG(X86YmmReg, ymm8 , ymm[8]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm9 , ymm[9]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm10, ymm[10]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm11, ymm[11]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm12, ymm[12]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm13, ymm[13]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm14, ymm[14]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm15, ymm[15]) //!< 256-bit YMM register (X64). +ASMJIT_DEF_REG(X86YmmReg, ymm16, ymm[16]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm17, ymm[17]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm18, ymm[18]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm19, ymm[19]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm20, ymm[20]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm21, ymm[21]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm22, ymm[22]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm23, ymm[23]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm24, ymm[24]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm25, ymm[25]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm26, ymm[26]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm27, ymm[27]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm28, ymm[28]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm29, ymm[29]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm30, ymm[30]) //!< 256-bit YMM register (X64 & AVX512VL+). +ASMJIT_DEF_REG(X86YmmReg, ymm31, ymm[31]) //!< 256-bit YMM register (X64 & AVX512VL+). + +ASMJIT_DEF_REG(X86ZmmReg, zmm0 , zmm[0]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm1 , zmm[1]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm2 , zmm[2]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm3 , zmm[3]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm4 , zmm[4]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm5 , zmm[5]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm6 , zmm[6]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm7 , zmm[7]) //!< 512-bit ZMM register. +ASMJIT_DEF_REG(X86ZmmReg, zmm8 , zmm[8]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm9 , zmm[9]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm10, zmm[10]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm11, zmm[11]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm12, zmm[12]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm13, zmm[13]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm14, zmm[14]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm15, zmm[15]) //!< 512-bit ZMM register (X64). +ASMJIT_DEF_REG(X86ZmmReg, zmm16, zmm[16]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm17, zmm[17]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm18, zmm[18]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm19, zmm[19]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm20, zmm[20]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm21, zmm[21]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm22, zmm[22]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm23, zmm[23]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm24, zmm[24]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm25, zmm[25]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm26, zmm[26]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm27, zmm[27]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm28, zmm[28]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm29, zmm[29]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm30, zmm[30]) //!< 512-bit ZMM register (X64 & AVX512+). +ASMJIT_DEF_REG(X86ZmmReg, zmm31, zmm[31]) //!< 512-bit ZMM register (X64 & AVX512+). + +ASMJIT_DEF_REG(X86KReg , k0 , k[0]) //!< 64-bit K register. +ASMJIT_DEF_REG(X86KReg , k1 , k[1]) //!< 64-bit K register. +ASMJIT_DEF_REG(X86KReg , k2 , k[2]) //!< 64-bit K register. +ASMJIT_DEF_REG(X86KReg , k3 , k[3]) //!< 64-bit K register. +ASMJIT_DEF_REG(X86KReg , k4 , k[4]) //!< 64-bit K register. +ASMJIT_DEF_REG(X86KReg , k5 , k[5]) //!< 64-bit K register. +ASMJIT_DEF_REG(X86KReg , k6 , k[6]) //!< 64-bit K register. +ASMJIT_DEF_REG(X86KReg , k7 , k[7]) //!< 64-bit K register. + +ASMJIT_DEF_REG(X86FpReg , fp0 , fp[0]) //!< 80-bit FPU register. +ASMJIT_DEF_REG(X86FpReg , fp1 , fp[1]) //!< 80-bit FPU register. +ASMJIT_DEF_REG(X86FpReg , fp2 , fp[2]) //!< 80-bit FPU register. +ASMJIT_DEF_REG(X86FpReg , fp3 , fp[3]) //!< 80-bit FPU register. +ASMJIT_DEF_REG(X86FpReg , fp4 , fp[4]) //!< 80-bit FPU register. +ASMJIT_DEF_REG(X86FpReg , fp5 , fp[5]) //!< 80-bit FPU register. +ASMJIT_DEF_REG(X86FpReg , fp6 , fp[6]) //!< 80-bit FPU register. +ASMJIT_DEF_REG(X86FpReg , fp7 , fp[7]) //!< 80-bit FPU register. + +ASMJIT_DEF_REG(X86MmReg , mm0 , mm[0]) //!< 64-bit MMX register. +ASMJIT_DEF_REG(X86MmReg , mm1 , mm[1]) //!< 64-bit MMX register. +ASMJIT_DEF_REG(X86MmReg , mm2 , mm[2]) //!< 64-bit MMX register. +ASMJIT_DEF_REG(X86MmReg , mm3 , mm[3]) //!< 64-bit MMX register. +ASMJIT_DEF_REG(X86MmReg , mm4 , mm[4]) //!< 64-bit MMX register. +ASMJIT_DEF_REG(X86MmReg , mm5 , mm[5]) //!< 64-bit MMX register. +ASMJIT_DEF_REG(X86MmReg , mm6 , mm[6]) //!< 64-bit MMX register. +ASMJIT_DEF_REG(X86MmReg , mm7 , mm[7]) //!< 64-bit MMX register. + +ASMJIT_DEF_REG(X86SegReg, es , seg[1]) //!< Cs segment register. +ASMJIT_DEF_REG(X86SegReg, cs , seg[2]) //!< Ss segment register. +ASMJIT_DEF_REG(X86SegReg, ss , seg[3]) //!< Ds segment register. +ASMJIT_DEF_REG(X86SegReg, ds , seg[4]) //!< Es segment register. +ASMJIT_DEF_REG(X86SegReg, fs , seg[5]) //!< Fs segment register. +ASMJIT_DEF_REG(X86SegReg, gs , seg[6]) //!< Gs segment register. + +ASMJIT_DEF_REG(X86GpReg , noGpReg, noGp) //!< No GP register (for `X86Mem` operand). +ASMJIT_DEF_REG(X86RipReg, rip, rip) //!< RIP register. + +#undef ASMJIT_DEF_REG + +//! Create 8-bit low GPB register operand. +static ASMJIT_INLINE X86GpReg gpb_lo(uint32_t index) noexcept { return X86GpReg(kX86RegTypeGpbLo, index, 1); } +//! Create 8-bit high GPB register operand. +static ASMJIT_INLINE X86GpReg gpb_hi(uint32_t index) noexcept { return X86GpReg(kX86RegTypeGpbHi, index, 1); } +//! Create 16-bit GPW register operand. +static ASMJIT_INLINE X86GpReg gpw(uint32_t index) noexcept { return X86GpReg(kX86RegTypeGpw, index, 2); } +//! Create 32-bit GPD register operand. +static ASMJIT_INLINE X86GpReg gpd(uint32_t index) noexcept { return X86GpReg(kX86RegTypeGpd, index, 4); } +//! Create 64-bit GPQ register operand (X64). +static ASMJIT_INLINE X86GpReg gpq(uint32_t index) noexcept { return X86GpReg(kX86RegTypeGpq, index, 8); } +//! Create 80-bit Fp register operand. +static ASMJIT_INLINE X86FpReg fp(uint32_t index) noexcept { return X86FpReg(kX86RegTypeFp, index, 10); } +//! Create 64-bit Mm register operand. +static ASMJIT_INLINE X86MmReg mm(uint32_t index) noexcept { return X86MmReg(kX86RegTypeMm, index, 8); } +//! Create 64-bit K register operand. +static ASMJIT_INLINE X86KReg k(uint32_t index) noexcept { return X86KReg(kX86RegTypeK, index, 8); } +//! Create 128-bit XMM register operand. +static ASMJIT_INLINE X86XmmReg xmm(uint32_t index) noexcept { return X86XmmReg(kX86RegTypeXmm, index, 16); } +//! Create 256-bit YMM register operand. +static ASMJIT_INLINE X86YmmReg ymm(uint32_t index) noexcept { return X86YmmReg(kX86RegTypeYmm, index, 32); } +//! Create 512-bit ZMM register operand. +static ASMJIT_INLINE X86ZmmReg zmm(uint32_t index) noexcept { return X86ZmmReg(kX86RegTypeZmm, index, 64); } + +// ============================================================================ +// [asmjit::x86 - Ptr (Reg)] +// ============================================================================ + +//! Create `[base.reg + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpReg& base, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, disp, size); +} +//! Create `[base.reg + (index.reg << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpReg& base, const X86GpReg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, index, shift, disp, size); +} +//! Create `[base.reg + (xmm.reg << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpReg& base, const X86XmmReg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, index, shift, disp, size); +} +//! Create `[base.reg + (ymm.reg << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpReg& base, const X86YmmReg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, index, shift, disp, size); +} + +//! Create `[label + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const Label& label, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(label, disp, size); +} +//! Create `[label + (index.reg << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const Label& label, const X86GpReg& index, uint32_t shift, int32_t disp = 0, uint32_t size = 0) noexcept { \ + return X86Mem(label, index, shift, disp, size); \ +} + +//! Create `[RIP + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86RipReg& rip_, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(rip_, disp, size); +} + +//! Create `[p + disp]` absolute memory operand with no/custom size information. +ASMJIT_API X86Mem ptr_abs(Ptr p, int32_t disp = 0, uint32_t size = 0) noexcept; +//! Create `[p + (index.reg << shift) + disp]` absolute memory operand with no/custom size information. +ASMJIT_API X86Mem ptr_abs(Ptr p, const X86Reg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept; + +//! \internal +#define ASMJIT_EXPAND_PTR_REG(prefix, size) \ + /*! Create `[base.reg + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpReg& base, int32_t disp = 0) noexcept { \ + return X86Mem(base, disp, size); \ + } \ + /*! Create `[base.reg + (index.reg << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpReg& base, const X86GpReg& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr(base, index, shift, disp, size); \ + } \ + /*! Create `[base.reg + (xmm.reg << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpReg& base, const X86XmmReg& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr(base, index, shift, disp, size); \ + } \ + /*! Create `[base.reg + (ymm.reg << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpReg& base, const X86YmmReg& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr(base, index, shift, disp, size); \ + } \ + /*! Create `[label + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const Label& label, int32_t disp = 0) noexcept { \ + return ptr(label, disp, size); \ + } \ + /*! Create `[label + (index.reg << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const Label& label, const X86GpReg& index, uint32_t shift, int32_t disp = 0) noexcept { \ + return ptr(label, index, shift, disp, size); \ + } \ + /*! Create `[RIP + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86RipReg& rip_, int32_t disp = 0) noexcept { \ + return ptr(rip_, disp, size); \ + } \ + /*! Create `[p + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr##_abs(Ptr p, int32_t disp = 0) noexcept { \ + return ptr_abs(p, disp, size); \ + } \ + /*! Create `[p + (index.reg << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr##_abs(Ptr p, const X86GpReg& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr_abs(p, index, shift, disp, size); \ + } \ + /*! Create `[p + (xmm.reg << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr##_abs(Ptr p, const X86XmmReg& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr_abs(p, index, shift, disp, size); \ + } \ + /*! Create `[p + (ymm.reg << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr##_abs(Ptr p, const X86YmmReg& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr_abs(p, index, shift, disp, size); \ + } + +ASMJIT_EXPAND_PTR_REG(byte, 1) +ASMJIT_EXPAND_PTR_REG(word, 2) +ASMJIT_EXPAND_PTR_REG(dword, 4) +ASMJIT_EXPAND_PTR_REG(qword, 8) +ASMJIT_EXPAND_PTR_REG(tword, 10) +ASMJIT_EXPAND_PTR_REG(oword, 16) +ASMJIT_EXPAND_PTR_REG(yword, 32) +ASMJIT_EXPAND_PTR_REG(zword, 64) +#undef ASMJIT_EXPAND_PTR_REG + +// ============================================================================ +// [asmjit::x86 - Ptr (Var)] +// ============================================================================ + +#if !defined(ASMJIT_DISABLE_COMPILER) +//! Create `[base.var + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpVar& base, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, disp, size); +} +//! Create `[base.var + (index.var << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpVar& base, const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, index, shift, disp, size); +} +//! Create `[base.var + (xmm.var << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpVar& base, const X86XmmVar& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, index, shift, disp, size); +} +//! Create `[base.var + (ymm.var << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const X86GpVar& base, const X86YmmVar& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept { + return X86Mem(base, index, shift, disp, size); +} +//! Create `[label + (index.var << shift) + disp]` memory operand with no/custom size information. +static ASMJIT_INLINE X86Mem ptr(const Label& label, const X86GpVar& index, uint32_t shift, int32_t disp = 0, uint32_t size = 0) noexcept { \ + return X86Mem(label, index, shift, disp, size); \ +} + +//! Create `[p + (index.var << shift) + disp]` absolute memory operand with no/custom size information. +ASMJIT_API X86Mem ptr_abs(Ptr p, const X86Var& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) noexcept; + +//! \internal +#define ASMJIT_EXPAND_PTR_VAR(prefix, size) \ + /*! Create `[base.var + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpVar& base, int32_t disp = 0) noexcept { \ + return X86Mem(base, disp, size); \ + } \ + /*! Create `[base.var + (index.var << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpVar& base, const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr(base, index, shift, disp, size); \ + } \ + /*! Create `[base.var + (xmm.var << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpVar& base, const X86XmmVar& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr(base, index, shift, disp, size); \ + } \ + /*! Create `[base.var + (ymm.var << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const X86GpVar& base, const X86YmmVar& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr(base, index, shift, disp, size); \ + } \ + /*! Create `[label + (index.var << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr(const Label& label, const X86GpVar& index, uint32_t shift, int32_t disp = 0) noexcept { \ + return ptr(label, index, shift, disp, size); \ + } \ + /*! Create `[p + (index.var << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr##_abs(Ptr p, const X86GpVar& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr_abs(p, reinterpret_cast(index), shift, disp, size); \ + } \ + /*! Create `[p + (xmm.var << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr##_abs(Ptr p, const X86XmmVar& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr_abs(p, reinterpret_cast(index), shift, disp, size); \ + } \ + /*! Create `[p + (ymm.var << shift) + disp]` memory operand. */ \ + static ASMJIT_INLINE X86Mem prefix##_ptr##_abs(Ptr p, const X86YmmVar& index, uint32_t shift = 0, int32_t disp = 0) noexcept { \ + return ptr_abs(p, reinterpret_cast(index), shift, disp, size); \ + } + +ASMJIT_EXPAND_PTR_VAR(byte, 1) +ASMJIT_EXPAND_PTR_VAR(word, 2) +ASMJIT_EXPAND_PTR_VAR(dword, 4) +ASMJIT_EXPAND_PTR_VAR(qword, 8) +ASMJIT_EXPAND_PTR_VAR(tword, 10) +ASMJIT_EXPAND_PTR_VAR(oword, 16) +ASMJIT_EXPAND_PTR_VAR(yword, 32) +ASMJIT_EXPAND_PTR_VAR(zword, 64) +#undef ASMJIT_EXPAND_PTR_VAR + +#endif // !ASMJIT_DISABLE_COMPILER + +} // x86 namespace + +#endif // !ASMJIT_EXPORTS_X86_REGS + +//! \} + +} // asmjit namespace + +// [Cleanup] +#undef ASMJIT_OP_ID + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86OPERAND_H diff --git a/DynamicHooks/thirdparty/AsmJit/x86/x86operand_regs.cpp b/DynamicHooks/thirdparty/AsmJit/x86/x86operand_regs.cpp new file mode 100644 index 0000000..77fc054 --- /dev/null +++ b/DynamicHooks/thirdparty/AsmJit/x86/x86operand_regs.cpp @@ -0,0 +1,84 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS +#define ASMJIT_EXPORTS_X86_REGS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies] +#include "../x86/x86operand.h" + +// [Api-Begin] +#include "../apibegin.h" + +namespace asmjit { + +#define REG(type, index, size) {{{ \ + Operand::kTypeReg, size, { ((type) << 8) + index }, kInvalidValue, {{ kInvalidVar, 0 }} \ +}}} + +#define REG_LIST_04(type, start, size) \ + REG(type, start + 0, size), \ + REG(type, start + 1, size), \ + REG(type, start + 2, size), \ + REG(type, start + 3, size) + +#define REG_LIST_08(type, start, size) \ + REG_LIST_04(type, start + 0, size), \ + REG_LIST_04(type, start + 4, size) + +#define REG_LIST_16(type, start, size) \ + REG_LIST_08(type, start + 0, size), \ + REG_LIST_08(type, start + 8, size) + +#define REG_LIST_32(type, start, size) \ + REG_LIST_16(type, start + 0, size), \ + REG_LIST_16(type, start + 16, size) + +const X86RegData x86RegData = { + { REG_LIST_16(kX86RegTypeGpd , 0, 4) }, + { REG_LIST_16(kX86RegTypeGpq , 0, 8) }, + { REG_LIST_16(kX86RegTypeGpbLo, 0, 1) }, + { REG_LIST_04(kX86RegTypeGpbHi, 0, 1) }, + { REG_LIST_16(kX86RegTypeGpw , 0, 2) }, + { REG_LIST_32(kX86RegTypeXmm , 0, 16) }, + { REG_LIST_32(kX86RegTypeYmm , 0, 32) }, + { REG_LIST_32(kX86RegTypeZmm , 0, 64) }, + { REG_LIST_08(kX86RegTypeK , 0, 8) }, + { REG_LIST_08(kX86RegTypeFp , 0, 10) }, + { REG_LIST_08(kX86RegTypeMm , 0, 8) }, + + { + REG(kX86RegTypeSeg, 0, 2), // Default. + REG(kX86RegTypeSeg, 1, 2), // ES. + REG(kX86RegTypeSeg, 2, 2), // CS. + REG(kX86RegTypeSeg, 3, 2), // SS. + REG(kX86RegTypeSeg, 4, 2), // DS. + REG(kX86RegTypeSeg, 5, 2), // FS. + REG(kX86RegTypeSeg, 6, 2) // GS. + }, + + REG(kInvalidReg, kInvalidReg, 0), // NoGp. + REG(kX86RegTypeRip, 0, 0), // RIP. +}; + +#undef REG_LIST_32 +#undef REG_LIST_16 +#undef REG_LIST_08 +#undef REG_LIST_04 +#undef REG + +} // asmjit namespace + +// [Api-End] +#include "../apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/DynamicHooks/utilities.cpp b/DynamicHooks/utilities.cpp new file mode 100644 index 0000000..9d2c21c --- /dev/null +++ b/DynamicHooks/utilities.cpp @@ -0,0 +1,70 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +// ============================================================================ +// >> INCLUDES +// ============================================================================ +#ifdef _WIN32 + #include +#endif + +#ifdef __linux__ + #include + #include + #define PAGE_SIZE 4096 + #define ALIGN(ar) ((long)ar & ~(PAGE_SIZE-1)) + #define PAGE_EXECUTE_READWRITE PROT_READ|PROT_WRITE|PROT_EXEC +#endif + +#include "asm.h" + + +// ============================================================================ +// >> ParseParams +// ============================================================================ +void SetMemPatchable(void* pAddr, size_t size) +{ +#if defined __linux__ + mprotect((void *) ALIGN(pAddr), sysconf(_SC_PAGESIZE), PAGE_EXECUTE_READWRITE); +#elif defined _WIN32 + DWORD old_prot; + VirtualProtect(pAddr, size, PAGE_EXECUTE_READWRITE, &old_prot); +#endif +} + + +// ============================================================================ +// >> WriteJMP +// ============================================================================ +void WriteJMP(unsigned char* src, void* dest) +{ + SetMemPatchable(src, 20); + inject_jmp((void *)src, dest); +} \ No newline at end of file diff --git a/DynamicHooks/utilities.h b/DynamicHooks/utilities.h new file mode 100644 index 0000000..d74a938 --- /dev/null +++ b/DynamicHooks/utilities.h @@ -0,0 +1,40 @@ +/** +* ============================================================================= +* DynamicHooks +* Copyright (C) 2015 Robin Gohmert. All rights reserved. +* ============================================================================= +* +* This software is provided 'as-is', without any express or implied warranty. +* In no event will the authors be held liable for any damages arising from +* the use of this software. +* +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software in a +* product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* +* 3. This notice may not be removed or altered from any source distribution. +* +* asm.h/cpp from devmaster.net (thanks cybermind) edited by pRED* to handle gcc +* -fPIC thunks correctly +* +* Idea and trampoline code taken from DynDetours (thanks your-name-here). +*/ + +#ifndef _UTILITIES_H +#define _UTILITIES_H + +// ============================================================================ +// >> FUNCTIONS +// ============================================================================ +void SetMemPatchable(void* pAddr, size_t size); +void WriteJMP(unsigned char* src, void* dest); + +#endif // _UTILITIES_H \ No newline at end of file diff --git a/dynhooks_sourcepawn.cpp b/dynhooks_sourcepawn.cpp new file mode 100644 index 0000000..de4820d --- /dev/null +++ b/dynhooks_sourcepawn.cpp @@ -0,0 +1,560 @@ +#include "dynhooks_sourcepawn.h" +#include "util.h" +#include + +#include "conventions/x86MsCdecl.h" +#include "conventions/x86MsThiscall.h" +#include "conventions/x86MsStdcall.h" +#include "conventions/x86GccCdecl.h" +#include "conventions/x86GccThiscall.h" + +#ifdef WIN32 +typedef x86MsCdecl x86DetourCdecl; +typedef x86MsThiscall x86DetourThisCall; +typedef x86MsStdcall x86DetourStdCall; +#else +typedef x86GccCdecl x86DetourCdecl; +typedef x86GccThiscall x86DetourThisCall; +// Uhm +typedef x86MsStdcall x86DetourStdCall; +#endif + +//ke::Vector g_pDetours; +//CallbackMap g_pPluginPreDetours; +//CallbackMap g_pPluginPostDetours; +DetourMap g_pPreDetours; +DetourMap g_pPostDetours; + +void UnhookFunction(HookType_t hookType, CHook *pDetour) +{ + CHookManager *pDetourManager = GetHookManager(); + pDetour->RemoveCallback(hookType, (HookHandlerFn *)(void *)&HandleDetour); + if (!pDetour->AreCallbacksRegistered()) + pDetourManager->UnhookFunction(pDetour->m_pFunc); +} + +bool AddDetourPluginHook(HookType_t hookType, CHook *pDetour, HookSetup *setup, IPluginFunction *pCallback) +{ + DetourMap *map; + if (hookType == HOOKTYPE_PRE) + map = &g_pPreDetours; + else + map = &g_pPostDetours; + + // See if we already have this detour in our list. + PluginCallbackList *wrappers; + DetourMap::Insert f = map->findForAdd(pDetour); + if (f.found()) + { + wrappers = f->value; + } + else + { + // Create a vector to store all the plugin callbacks in. + wrappers = new PluginCallbackList; + if (!map->add(f, pDetour, wrappers)) + { + delete wrappers; + UnhookFunction(hookType, pDetour); + return false; + } + } + + CDynamicHooksSourcePawn *pWrapper = new CDynamicHooksSourcePawn(setup, pDetour, pCallback, hookType == HOOKTYPE_POST); + if (!wrappers->append(pWrapper)) + { + if (wrappers->empty()) + { + delete wrappers; + UnhookFunction(hookType, pDetour); + map->remove(f); + } + delete pWrapper; + return false; + } + + return true; +} + +bool RemoveDetourPluginHook(HookType_t hookType, CHook *pDetour, IPluginFunction *pCallback) +{ + DetourMap *map; + if (hookType == HOOKTYPE_PRE) + map = &g_pPreDetours; + else + map = &g_pPostDetours; + + DetourMap::Result res = map->find(pDetour); + if (!res.found()) + return false; + + // Remove the plugin's callback + bool bRemoved = false; + PluginCallbackList *wrappers = res->value; + for (int i = wrappers->length()-1; i >= 0 ; i--) + { + CDynamicHooksSourcePawn *pWrapper = wrappers->at(i); + if (pWrapper->plugin_callback == pCallback) + { + bRemoved = true; + delete pWrapper; + wrappers->remove(i--); + } + } + + // No more plugin hooks on this callback. Free our structures. + if (wrappers->empty()) + { + delete wrappers; + UnhookFunction(hookType, pDetour); + map->remove(res); + } + + return bRemoved; +} + +void RemoveAllCallbacksForContext(HookType_t hookType, DetourMap *map, IPluginContext *pContext) +{ + PluginCallbackList *wrappers; + CDynamicHooksSourcePawn *pWrapper; + DetourMap::iterator it = map->iter(); + // Run through all active detours we added. + for (; !it.empty(); it.next()) + { + wrappers = it->value; + // See if there are callbacks of this plugin context registered + // and remove them. + for (int i = wrappers->length() - 1; i >= 0; i--) + { + pWrapper = wrappers->at(i); + if (pWrapper->plugin_callback->GetParentContext() != pContext) + continue; + + delete pWrapper; + wrappers->remove(i--); + } + + // No plugin interested in this hook anymore. unhook. + if (wrappers->empty()) + { + delete wrappers; + UnhookFunction(hookType, it->key); + it.erase(); + } + } +} + +void RemoveAllCallbacksForContext(IPluginContext *pContext) +{ + RemoveAllCallbacksForContext(HOOKTYPE_PRE, &g_pPreDetours, pContext); + RemoveAllCallbacksForContext(HOOKTYPE_POST, &g_pPostDetours, pContext); +} + +ICallingConvention *ConstructCallingConvention(HookSetup *setup) +{ + std::vector vecArgTypes; + for (size_t i = 0; i < setup->params.size(); i++) + { + ParamInfo &info = setup->params[i]; + DataTypeSized_t type; + type.type = DynamicHooks_ConvertParamTypeFrom(info.type); + type.size = info.size; + vecArgTypes.push_back(type); + } + + DataTypeSized_t returnType; + returnType.type = DynamicHooks_ConvertReturnTypeFrom(setup->returnType); + returnType.size = 0; + + ICallingConvention *pCallConv = nullptr; + switch (setup->callConv) + { + case CallConv_CDECL: + pCallConv = new x86DetourCdecl(vecArgTypes, returnType); + break; + case CallConv_THISCALL: + pCallConv = new x86DetourThisCall(vecArgTypes, returnType); + break; + case CallConv_STDCALL: + pCallConv = new x86DetourStdCall(vecArgTypes, returnType); + break; + } + + return pCallConv; +} + +bool HandleDetour(HookType_t hookType, CHook* pDetour) +{ + DetourMap *map; + if (hookType == HOOKTYPE_PRE) + map = &g_pPreDetours; + else + map = &g_pPostDetours; + + // Find the callback list for this detour. + DetourMap::Result r = map->find(pDetour); + if (!r.found()) + return false; + + // List of all callbacks. + PluginCallbackList *wrappers = r->value; + + HookReturnStruct *returnStruct = NULL; + Handle_t rHndl = BAD_HANDLE; + + HookParamsStruct *paramStruct = NULL; + Handle_t pHndl = BAD_HANDLE; + + int argNum = pDetour->m_pCallingConvention->m_vecArgTypes.size(); + MRESReturn finalRet = MRES_Ignored; + ke::AutoPtr finalRetBuf(new uint8_t[pDetour->m_pCallingConvention->m_returnType.size]); + + // Call all the plugin functions.. + for (size_t i = 0; i < wrappers->length(); i++) + { + CDynamicHooksSourcePawn *pWrapper = wrappers->at(i); + IPluginFunction *pCallback = pWrapper->plugin_callback; + MRESReturn tempRet = MRES_Ignored; + ke::AutoPtr tempRetBuf(new uint8_t[pDetour->m_pCallingConvention->m_returnType.size]); + + // Find the this pointer. + if (pWrapper->callConv == CallConv_THISCALL) + { + void *thisPtr = pDetour->GetArgument(0); + cell_t thisAddr = GetThisPtr(thisPtr, pWrapper->thisType); + pCallback->PushCell(thisAddr); + } + + if (pWrapper->returnType != ReturnType_Void) + { + returnStruct = pWrapper->GetReturnStruct(); + HandleError err; + rHndl = handlesys->CreateHandle(g_HookReturnHandle, returnStruct, pCallback->GetParentRuntime()->GetDefaultContext()->GetIdentity(), myself->GetIdentity(), &err); + if (!rHndl) + { + pCallback->Cancel(); + pCallback->GetParentRuntime()->GetDefaultContext()->BlamePluginError(pCallback, "Error creating ReturnHandle in preparation to call hook callback. (error %d)", err); + + if (returnStruct) + delete returnStruct; + + // Don't call more callbacks. They will probably fail too. + break; + } + pCallback->PushCell(rHndl); + } + + if (argNum > 0) + { + paramStruct = pWrapper->GetParamStruct(); + HandleError err; + pHndl = handlesys->CreateHandle(g_HookParamsHandle, paramStruct, pCallback->GetParentRuntime()->GetDefaultContext()->GetIdentity(), myself->GetIdentity(), &err); + if (!pHndl) + { + pCallback->Cancel(); + pCallback->GetParentRuntime()->GetDefaultContext()->BlamePluginError(pCallback, "Error creating ThisHandle in preparation to call hook callback. (error %d)", err); + + // Don't leak our own handles here! + if (rHndl) + { + HandleSecurity sec(pCallback->GetParentRuntime()->GetDefaultContext()->GetIdentity(), myself->GetIdentity()); + handlesys->FreeHandle(rHndl, &sec); + rHndl = BAD_HANDLE; + } + + if (paramStruct) + delete paramStruct; + + // Don't call more callbacks. They will probably fail too. + break; + } + pCallback->PushCell(pHndl); + } + + cell_t result = (cell_t)MRES_Ignored; + pCallback->Execute(&result); + + switch ((MRESReturn)result) + { + case MRES_Handled: + tempRet = MRES_Handled; + break; + case MRES_ChangedHandled: + tempRet = MRES_Handled; + pWrapper->UpdateParamsFromStruct(paramStruct); + break; + case MRES_ChangedOverride: + if (pWrapper->returnType != ReturnType_Void) + { + if (returnStruct->isChanged) + { + if (pWrapper->returnType == ReturnType_String || pWrapper->returnType == ReturnType_Int || pWrapper->returnType == ReturnType_Bool) + { + tempRetBuf = *(void **)returnStruct->newResult; + } + else if (pWrapper->returnType == ReturnType_Float) + { + *(float *)tempRetBuf.get() = *(float *)returnStruct->newResult; + } + else + { + tempRetBuf = returnStruct->newResult; + } + } + else //Throw an error if no override was set + { + tempRet = MRES_Ignored; + pCallback->GetParentRuntime()->GetDefaultContext()->BlamePluginError(pCallback, "Tried to override return value without return value being set"); + break; + } + } + // TODO: Introduce that override concept in dyndetours. + // This doesn't call the original function at the moment, but just returns the given return value. + tempRet = MRES_Override; + pWrapper->UpdateParamsFromStruct(paramStruct); + break; + case MRES_Override: + if (pWrapper->returnType != ReturnType_Void) + { + if (returnStruct->isChanged) + { + // TODO: Introduce that override concept in dyndetours. + // This doesn't call the original function at the moment, but just returns the given return value. + tempRet = MRES_Override; + if (pWrapper->returnType == ReturnType_String || pWrapper->returnType == ReturnType_Int || pWrapper->returnType == ReturnType_Bool) + { + tempRetBuf = *(void **)returnStruct->newResult; + } + else if (pWrapper->returnType == ReturnType_Float) + { + *(float *)tempRetBuf.get() = *(float *)returnStruct->newResult; + } + else + { + tempRetBuf = returnStruct->newResult; + } + } + else //Throw an error if no override was set + { + tempRet = MRES_Ignored; + pCallback->GetParentRuntime()->GetDefaultContext()->BlamePluginError(pCallback, "Tried to override return value without return value being set"); + } + } + break; + case MRES_Supercede: + if (pWrapper->returnType != ReturnType_Void) + { + if (returnStruct->isChanged) + { + tempRet = MRES_Supercede; + if (pWrapper->returnType == ReturnType_String || pWrapper->returnType == ReturnType_Int || pWrapper->returnType == ReturnType_Bool) + { + tempRetBuf = *(void **)returnStruct->newResult; + } + else if (pWrapper->returnType == ReturnType_Float) + { + *(float *)tempRetBuf.get() = *(float *)returnStruct->newResult; + } + else + { + tempRetBuf = returnStruct->newResult; + } + } + else //Throw an error if no override was set + { + tempRet = MRES_Ignored; + pCallback->GetParentRuntime()->GetDefaultContext()->BlamePluginError(pCallback, "Tried to override return value without return value being set"); + } + } + else + { + tempRet = MRES_Supercede; + } + break; + default: + tempRet = MRES_Ignored; + break; + } + + // Prioritize the actions. + if (finalRet <= tempRet) { + + // ------------------------------------ + // Copy the action and return value. + // ------------------------------------ + finalRet = tempRet; + memcpy(*finalRetBuf, *tempRetBuf, pDetour->m_pCallingConvention->m_returnType.size); + } + + // Free the handles again. + HandleSecurity sec(pCallback->GetParentRuntime()->GetDefaultContext()->GetIdentity(), myself->GetIdentity()); + + if (returnStruct) + { + handlesys->FreeHandle(rHndl, &sec); + } + if (paramStruct) + { + handlesys->FreeHandle(pHndl, &sec); + } + } + + if (finalRet >= MRES_Override) + { + void* pPtr = pDetour->m_pCallingConvention->GetReturnPtr(pDetour->m_pRegisters); + memcpy(pPtr, *finalRetBuf, pDetour->m_pCallingConvention->m_returnType.size); + pDetour->m_pCallingConvention->ReturnPtrChanged(pDetour->m_pRegisters, pPtr); + } + + return finalRet == MRES_Supercede; +} + +CDynamicHooksSourcePawn::CDynamicHooksSourcePawn(HookSetup *setup, CHook *pDetour, IPluginFunction *pCallback, bool post) +{ + this->params = setup->params; + this->offset = -1; + this->returnFlag = setup->returnFlag; + this->returnType = setup->returnType; + this->post = post; + this->plugin_callback = pCallback; + this->entity = -1; + this->thisType = setup->thisType; + this->hookType = setup->hookType; + this->m_pDetour = pDetour; + this->callConv = setup->callConv; +} + +HookReturnStruct *CDynamicHooksSourcePawn::GetReturnStruct() +{ + HookReturnStruct *res = new HookReturnStruct(); + res->isChanged = false; + res->type = this->returnType; + res->orgResult = NULL; + res->newResult = NULL; + + if (this->post) + { + switch (this->returnType) + { + case ReturnType_String: + res->orgResult = malloc(sizeof(string_t)); + res->newResult = malloc(sizeof(string_t)); + *(string_t *)res->orgResult = m_pDetour->GetReturnValue(); + break; + case ReturnType_Int: + res->orgResult = malloc(sizeof(int)); + res->newResult = malloc(sizeof(int)); + *(int *)res->orgResult = m_pDetour->GetReturnValue(); + break; + case ReturnType_Bool: + res->orgResult = malloc(sizeof(bool)); + res->newResult = malloc(sizeof(bool)); + *(bool *)res->orgResult = m_pDetour->GetReturnValue(); + break; + case ReturnType_Float: + res->orgResult = malloc(sizeof(float)); + res->newResult = malloc(sizeof(float)); + *(float *)res->orgResult = m_pDetour->GetReturnValue(); + break; + case ReturnType_Vector: + { + res->orgResult = malloc(sizeof(SDKVector)); + res->newResult = malloc(sizeof(SDKVector)); + SDKVector vec = m_pDetour->GetReturnValue(); + *(SDKVector *)res->orgResult = vec; + break; + } + default: + res->orgResult = m_pDetour->GetReturnValue(); + break; + } + } + else + { + switch (this->returnType) + { + case ReturnType_String: + res->orgResult = malloc(sizeof(string_t)); + res->newResult = malloc(sizeof(string_t)); + *(string_t *)res->orgResult = NULL_STRING; + break; + case ReturnType_Vector: + res->orgResult = malloc(sizeof(SDKVector)); + res->newResult = malloc(sizeof(SDKVector)); + *(SDKVector *)res->orgResult = SDKVector(); + break; + case ReturnType_Int: + res->orgResult = malloc(sizeof(int)); + res->newResult = malloc(sizeof(int)); + *(int *)res->orgResult = 0; + break; + case ReturnType_Bool: + res->orgResult = malloc(sizeof(bool)); + res->newResult = malloc(sizeof(bool)); + *(bool *)res->orgResult = false; + break; + case ReturnType_Float: + res->orgResult = malloc(sizeof(float)); + res->newResult = malloc(sizeof(float)); + *(float *)res->orgResult = 0.0; + break; + } + } + + return res; +} + +HookParamsStruct *CDynamicHooksSourcePawn::GetParamStruct() +{ + HookParamsStruct *params = new HookParamsStruct(); + params->dg = this; + + size_t paramsSize = this->m_pDetour->m_pCallingConvention->GetArgStackSize(); + std::vector &argTypes = m_pDetour->m_pCallingConvention->m_vecArgTypes; + int numArgs = argTypes.size(); + + params->orgParams = (void **)malloc(paramsSize); + params->newParams = (void **)malloc(paramsSize); + params->isChanged = (bool *)malloc(numArgs * sizeof(bool)); + + size_t offset = 0; + for (int i = 0; i < numArgs; i++) + { + void *pArgPtr = m_pDetour->m_pCallingConvention->GetStackArgumentPtr(m_pDetour->m_pRegisters); + memcpy(params->orgParams, pArgPtr, paramsSize); + + *(void **)((intptr_t)params->newParams + offset) = NULL; + params->isChanged[i] = false; + + offset += argTypes[i].size; + } + + return params; +} + +void CDynamicHooksSourcePawn::UpdateParamsFromStruct(HookParamsStruct *params) +{ + // Function had no params to update now. + if (!params) + return; + + std::vector &argTypes = m_pDetour->m_pCallingConvention->m_vecArgTypes; + int numArgs = argTypes.size(); + + int firstArg = 0; + if (callConv == CallConv_THISCALL) + firstArg = 1; + + size_t offset = 0; + for (int i = 0; i < numArgs; i++) + { + int size = argTypes[i].size;; + if (params->isChanged[i]) + { + void *paramAddr = (void *)((intptr_t)params->newParams + offset); + void *stackAddr = m_pDetour->m_pCallingConvention->GetArgumentPtr(i + firstArg, m_pDetour->m_pRegisters); + memcpy(stackAddr, paramAddr, size); + } + offset += size; + } +} \ No newline at end of file diff --git a/dynhooks_sourcepawn.h b/dynhooks_sourcepawn.h new file mode 100644 index 0000000..14463e3 --- /dev/null +++ b/dynhooks_sourcepawn.h @@ -0,0 +1,41 @@ +#ifndef _INCLUDE_DYNHOOKS_SP_H_ +#define _INCLUDE_DYNHOOKS_SP_H_ + +#include "manager.h" +#include "vhook.h" +#include +#include + +class CDynamicHooksSourcePawn; +typedef ke::HashMap> CallbackMap; +typedef ke::Vector PluginCallbackList; +typedef ke::HashMap> DetourMap; + +//extern ke::Vector g_pDetours; +// Keep a list of plugin callback -> Hook wrapper for easily removing plugin hooks +//extern CallbackMap g_pPluginPreDetours; +//extern CallbackMap g_pPluginPostDetours; +// Keep a list of hook -> callbacks for calling in the detour handler +extern DetourMap g_pPreDetours; +extern DetourMap g_pPostDetours; + +class CDynamicHooksSourcePawn : public DHooksInfo { +public: + CDynamicHooksSourcePawn(HookSetup *setup, CHook *pDetour, IPluginFunction *pCallback, bool post); + + HookReturnStruct *GetReturnStruct(); + HookParamsStruct *CDynamicHooksSourcePawn::GetParamStruct(); + void UpdateParamsFromStruct(HookParamsStruct *params); + +public: + CHook *m_pDetour; + CallingConvention callConv; +}; + +ICallingConvention *ConstructCallingConvention(HookSetup *setup); +bool HandleDetour(HookType_t hookType, CHook* pDetour); +bool AddDetourPluginHook(HookType_t hookType, CHook *pDetour, HookSetup *setup, IPluginFunction *pCallback); +bool RemoveDetourPluginHook(HookType_t hookType, CHook *pDetour, IPluginFunction *pCallback); +void RemoveAllCallbacksForContext(IPluginContext *pContext); + +#endif \ No newline at end of file diff --git a/extension.cpp b/extension.cpp index 42ed7b8..2c78ac2 100644 --- a/extension.cpp +++ b/extension.cpp @@ -1,5 +1,6 @@ #include "extension.h" #include "listeners.h" +#include "dynhooks_sourcepawn.h" DHooks g_DHooksIface; /**< Global singleton for extension's main interface */ SMEXT_LINK(&g_DHooksIface); @@ -35,6 +36,18 @@ bool DHooks::SDK_OnLoad(char *error, size_t maxlength, bool late) return false; } + if (!g_pPreDetours.init()) + { + snprintf(error, maxlength, "Could not initialize pre hook detours hashmap."); + return false; + } + + if (!g_pPostDetours.init()) + { + snprintf(error, maxlength, "Could not initialize post hook detours hashmap."); + return false; + } + sharesys->AddDependency(myself, "bintools.ext", true, true); sharesys->AddDependency(myself, "sdktools.ext", true, true); sharesys->AddDependency(myself, "sdkhooks.ext", true, true); @@ -76,6 +89,8 @@ void DHooks::SDK_OnAllLoaded() void DHooks::SDK_OnUnload() { CleanupHooks(); + // FIXME: Unhook only functions that are hooked by a plugin. + cleanup + GetHookManager()->UnhookAllFunctions(); if(g_pEntityListener) { g_pEntityListener->CleanupListeners(); @@ -102,6 +117,7 @@ bool DHooks::SDK_OnMetamodLoad(ISmmAPI *ismm, char *error, size_t maxlength, boo void DHooks::OnPluginUnloaded(IPlugin *plugin) { CleanupHooks(plugin->GetBaseContext()); + RemoveAllCallbacksForContext(plugin->GetBaseContext()); if(g_pEntityListener) { g_pEntityListener->CleanupListeners(plugin->GetBaseContext()); diff --git a/listeners.h b/listeners.h index 2d8041f..822ec9c 100644 --- a/listeners.h +++ b/listeners.h @@ -3,6 +3,7 @@ #include "extension.h" #include "vhook.h" +#include enum ListenType { diff --git a/natives.cpp b/natives.cpp index cd718d9..a4c0d22 100644 --- a/natives.cpp +++ b/natives.cpp @@ -1,5 +1,14 @@ #include "natives.h" #include "util.h" +#include "dynhooks_sourcepawn.h" + +// Must match same enum in sdktools.inc +enum SDKFuncConfSource +{ + SDKConf_Virtual, + SDKConf_Signature, + SDKConf_Address +}; bool GetHandleIfValidOrError(HandleType_t type, void **object, IPluginContext *pContext, cell_t param) { @@ -38,6 +47,77 @@ cell_t Native_CreateHook(IPluginContext *pContext, const cell_t *params) return hndl; } + +//native Handle:DHookCreateDetour(Address:funcaddr, CallingConvention callConv, ReturnType:returntype, ThisPointerType:thistype); +cell_t Native_CreateDetour(IPluginContext *pContext, const cell_t *params) +{ + HookSetup *setup = new HookSetup((ReturnType)params[3], PASSFLAG_BYVAL, (CallingConvention)params[2], (ThisPointerType)params[4], (void *)params[1]); + + Handle_t hndl = handlesys->CreateHandle(g_HookSetupHandle, setup, pContext->GetIdentity(), myself->GetIdentity(), NULL); + + if (!hndl) + { + delete setup; + return pContext->ThrowNativeError("Failed to create hook"); + } + + return hndl; +} + + +//native bool:DHookSetFromConf(Handle:setup, Handle:gameconf, SDKFuncConfSource:source, const String:name[]); +cell_t Native_SetFromConf(IPluginContext *pContext, const cell_t *params) +{ + HookSetup *setup; + if (!GetHandleIfValidOrError(g_HookSetupHandle, (void **)&setup, pContext, params[1])) + { + return 0; + } + + IGameConfig *conf; + HandleError err; + if ((conf = gameconfs->ReadHandle(params[2], pContext->GetIdentity(), &err)) == nullptr) + { + return pContext->ThrowNativeError("Invalid Handle %x (error %d)", params[2], err); + } + + char *key; + pContext->LocalToString(params[4], &key); + + int offset = -1; + void *addr = nullptr;; + switch (params[3]) + { + case SDKConf_Virtual: + if (!conf->GetOffset(key, &offset)) + { + return 0; + } + break; + case SDKConf_Signature: + if (!conf->GetMemSig(key, &addr) || !addr) + { + return 0; + } + break; + case SDKConf_Address: + if (!conf->GetAddress(key, &addr) || !addr) + { + return 0; + } + break; + default: + return pContext->ThrowNativeError("Unknown SDKFuncConfSource: %d", params[3]); + } + + // Save the new info. This always invalidates the other option. + // Detour or vhook. + setup->funcAddr = addr; + setup->offset = offset; + + return 1; +} + //native bool:DHookAddParam(Handle:setup, HookParamType:type); OLD //native bool:DHookAddParam(Handle:setup, HookParamType:type, size=-1, DHookPassFlag:flag=DHookPass_ByVal); cell_t Native_AddParam(IPluginContext *pContext, const cell_t *params) @@ -80,6 +160,87 @@ cell_t Native_AddParam(IPluginContext *pContext, const cell_t *params) return 1; } + + +// native bool:DHookEnableDetour(Handle:setup, bool:post, DHookCallback:callback); +cell_t Native_EnableDetour(IPluginContext *pContext, const cell_t *params) +{ + HookSetup *setup; + + if (!GetHandleIfValidOrError(g_HookSetupHandle, (void **)&setup, pContext, params[1])) + { + return 0; + } + + if (setup->funcAddr == nullptr) + { + return pContext->ThrowNativeError("Hook not setup for a detour."); + } + + IPluginFunction *callback = pContext->GetFunctionById(params[3]); + if (!callback) + { + return pContext->ThrowNativeError("Failed to retrieve function by id"); + } + + bool post = params[2] != 0; + HookType_t hookType = post ? HOOKTYPE_POST : HOOKTYPE_PRE; + + // Check if we already detoured that function. + CHookManager *pDetourManager = GetHookManager(); + CHook* pDetour = pDetourManager->FindHook(setup->funcAddr); + + // If there is no detour on this function yet, create it. + if (!pDetour) + { + ICallingConvention *pCallConv = ConstructCallingConvention(setup); + pDetour = pDetourManager->HookFunction(setup->funcAddr, pCallConv); + } + + // Register our pre/post handler. + pDetour->AddCallback(hookType, (HookHandlerFn *)&HandleDetour); + + // Add the plugin callback to the map. + return AddDetourPluginHook(hookType, pDetour, setup, callback); +} + +// native bool:DHookDisableDetour(Handle:setup, bool:post, DHookCallback:callback); +cell_t Native_DisableDetour(IPluginContext *pContext, const cell_t *params) +{ + HookSetup *setup; + + if (!GetHandleIfValidOrError(g_HookSetupHandle, (void **)&setup, pContext, params[1])) + { + return 0; + } + + if (setup->funcAddr == nullptr) + { + return pContext->ThrowNativeError("Hook not setup for a detour."); + } + + IPluginFunction *callback = pContext->GetFunctionById(params[3]); + if (!callback) + { + return pContext->ThrowNativeError("Failed to retrieve function by id"); + } + + bool post = params[2] != 0; + HookType_t hookType = post ? HOOKTYPE_POST : HOOKTYPE_PRE; + + // Check if we already detoured that function. + CHookManager *pDetourManager = GetHookManager(); + CHook* pDetour = pDetourManager->FindHook(setup->funcAddr); + + if (!pDetour || !pDetour->IsCallbackRegistered(hookType, (HookHandlerFn *)&HandleDetour)) + { + return pContext->ThrowNativeError("Function not detoured."); + } + + // Remove the callback from the hook. + return RemoveDetourPluginHook(hookType, pDetour, callback); +} + // native DHookEntity(Handle:setup, bool:post, entity, DHookRemovalCB:removalcb); cell_t Native_HookEntity(IPluginContext *pContext, const cell_t *params) { @@ -90,6 +251,11 @@ cell_t Native_HookEntity(IPluginContext *pContext, const cell_t *params) return 0; } + if (setup->offset == -1) + { + return pContext->ThrowNativeError("Hook not setup for a virtual hook."); + } + if(setup->hookType != HookType_Entity) { return pContext->ThrowNativeError("Hook is not an entity hook"); @@ -133,6 +299,11 @@ cell_t Native_HookGamerules(IPluginContext *pContext, const cell_t *params) return 0; } + if (setup->offset == -1) + { + return pContext->ThrowNativeError("Hook not setup for a virtual hook."); + } + if(setup->hookType != HookType_GameRules) { return pContext->ThrowNativeError("Hook is not a gamerules hook"); @@ -178,6 +349,11 @@ cell_t Native_HookRaw(IPluginContext *pContext, const cell_t *params) return 0; } + if (setup->offset == -1) + { + return pContext->ThrowNativeError("Hook not setup for a virtual hook."); + } + if(setup->hookType != HookType_Raw) { return pContext->ThrowNativeError("Hook is not a raw hook"); @@ -1036,7 +1212,11 @@ cell_t Native_IsNullParam(IPluginContext *pContext, const cell_t *params) sp_nativeinfo_t g_Natives[] = { {"DHookCreate", Native_CreateHook}, + {"DHookCreateDetour", Native_CreateDetour}, + {"DHookSetFromConf", Native_SetFromConf }, {"DHookAddParam", Native_AddParam}, + {"DHookEnableDetour", Native_EnableDetour}, + //{"DHookDisableDetour", Native_DisableDetour}, {"DHookEntity", Native_HookEntity}, {"DHookGamerules", Native_HookGamerules}, {"DHookRaw", Native_HookRaw}, diff --git a/sdk/smsdk_config.h b/sdk/smsdk_config.h index 90b54bf..a780a46 100644 --- a/sdk/smsdk_config.h +++ b/sdk/smsdk_config.h @@ -63,7 +63,7 @@ #define SMEXT_ENABLE_HANDLESYS #define SMEXT_ENABLE_PLAYERHELPERS //#define SMEXT_ENABLE_DBMANAGER -//#define SMEXT_ENABLE_GAMECONF +#define SMEXT_ENABLE_GAMECONF //#define SMEXT_ENABLE_MEMUTILS #define SMEXT_ENABLE_GAMEHELPERS //#define SMEXT_ENABLE_TIMERSYS diff --git a/sourcemod/scripting/include/dhooks.inc b/sourcemod/scripting/include/dhooks.inc index 6bf8211..ba56321 100644 --- a/sourcemod/scripting/include/dhooks.inc +++ b/sourcemod/scripting/include/dhooks.inc @@ -72,6 +72,13 @@ enum HookType HookType_Raw }; +enum CallingConvention +{ + CallConv_CDECL, + CallConv_THISCALL, + CallConv_STDCALL, +}; + enum MRESReturn { MRES_ChangedHandled = -2, // Use changed values and return MRES_Handled @@ -176,6 +183,10 @@ native bool DHookRemoveEntityListener(ListenType type, ListenCB callback); */ native Handle DHookCreate(int offset, HookType hooktype, ReturnType returntype, ThisPointerType thistype, DHookCallback callback); +native Handle DHookCreateDetour(Address funcaddr, CallingConvention callConv, ReturnType returntype, ThisPointerType thistype); +native bool DHookSetFromConf(Handle setup, Handle gameconf, SDKFuncConfSource source, const char[] name); +native bool:DHookEnableDetour(Handle:setup, bool:post, DHookCallback callback); + /* Adds param to a hook setup * * @param setup Setup handle to add the param to. diff --git a/util.cpp b/util.cpp index 514891c..898f50b 100644 --- a/util.cpp +++ b/util.cpp @@ -46,3 +46,56 @@ size_t GetParamsSize(DHooksCallback *dg)//Get the full size, this is for creatin return res; } + +DataType_t DynamicHooks_ConvertParamTypeFrom(HookParamType type) +{ + switch (type) + { + case HookParamType_Int: + return DATA_TYPE_INT; + case HookParamType_Bool: + return DATA_TYPE_BOOL; + case HookParamType_Float: + return DATA_TYPE_FLOAT; + case HookParamType_StringPtr: + case HookParamType_CharPtr: + case HookParamType_VectorPtr: + case HookParamType_CBaseEntity: + case HookParamType_ObjectPtr: + case HookParamType_Edict: + return DATA_TYPE_POINTER; + case HookParamType_Object: + return DATA_TYPE_OBJECT; + default: + smutils->LogError(myself, "Unhandled parameter type %d!", type); + } + + return DATA_TYPE_POINTER; +} + +DataType_t DynamicHooks_ConvertReturnTypeFrom(ReturnType type) +{ + switch (type) + { + case ReturnType_Void: + return DATA_TYPE_VOID; + case ReturnType_Int: + return DATA_TYPE_INT; + case ReturnType_Bool: + return DATA_TYPE_BOOL; + case ReturnType_Float: + return DATA_TYPE_FLOAT; + case ReturnType_StringPtr: + case ReturnType_CharPtr: + case ReturnType_VectorPtr: + case ReturnType_CBaseEntity: + case ReturnType_Edict: + return DATA_TYPE_POINTER; + case ReturnType_Vector: + return DATA_TYPE_OBJECT; + default: + smutils->LogError(myself, "Unhandled return type %d!", type); + } + + return DATA_TYPE_VOID; +} diff --git a/util.h b/util.h index 95788a7..7bf5e42 100644 --- a/util.h +++ b/util.h @@ -2,9 +2,13 @@ #define _INCLUDE_UTIL_FUNCTIONS_H_ #include "vhook.h" +#include "convention.h" size_t GetParamOffset(HookParamsStruct *params, unsigned int index); void * GetObjectAddr(HookParamType type, unsigned int flags, void **params, size_t offset); size_t GetParamTypeSize(HookParamType type); size_t GetParamsSize(DHooksCallback *dg); + +DataType_t DynamicHooks_ConvertParamTypeFrom(HookParamType type); +DataType_t DynamicHooks_ConvertReturnTypeFrom(ReturnType type); #endif diff --git a/vhook.cpp b/vhook.cpp index fca8525..48d33f2 100644 --- a/vhook.cpp +++ b/vhook.cpp @@ -1,6 +1,7 @@ #include "vhook.h" #include "vfunc_call.h" #include "util.h" +#include SourceHook::IHookManagerAutoGen *g_pHookManager = NULL; @@ -14,6 +15,95 @@ using namespace SourceHook; #define OBJECT_OFFSET (sizeof(void *)*2) #endif +#ifndef WIN32 +void *GenerateThunk(ReturnType type) +{ + sp::MacroAssembler masm; + static const size_t kStackNeeded = (2) * 4; // 2 args max + static const size_t kReserve = ke::Align(kStackNeeded + 8, 16) - 8; + + masm.push(ebp); + masm.movl(ebp, esp); + masm.subl(esp, kReserve); + if (type != ReturnType_String && type != ReturnType_Vector) + { + masm.lea(eax, Operand(ebp, 12)); // grab the incoming caller argument vector + masm.movl(Operand(esp, 1 * 4), eax); // set that as the 2nd argument + masm.movl(eax, Operand(ebp, 8)); // grab the |this| + masm.movl(Operand(esp, 0 * 4), eax); // set |this| as the 1st argument*/ + } + else + { + masm.lea(eax, Operand(ebp, 8)); // grab the incoming caller argument vector + masm.movl(Operand(esp, 1 * 4), eax); // set that as the 2nd argument + masm.movl(eax, Operand(ebp, 12)); // grab the |this| + masm.movl(Operand(esp, 0 * 4), eax); // set |this| as the 1st argument*/ + } + if (type == ReturnType_Float) + { + masm.call(ExternalAddress((void *)Callback_float)); + } + else if (type == ReturnType_Vector) + { + masm.call(ExternalAddress((void *)Callback_vector)); + } + else if (type == ReturnType_String) + { + masm.call(ExternalAddress((void *)Callback_stringt)); + } + else + { + masm.call(ExternalAddress((void *)Callback)); + } + masm.addl(esp, kReserve); + masm.pop(ebp); // restore ebp + masm.ret(); + + void *base = g_pSM->GetScriptingEngine()->AllocatePageMemory(masm.length()); + masm.emitToExecutableMemory(base); + return base; +} +#else +// HUGE THANKS TO BAILOPAN (dvander)! +void *GenerateThunk(ReturnType type) +{ + sp::MacroAssembler masm; + static const size_t kStackNeeded = (3 + 1) * 4; // 3 args max, 1 locals max + static const size_t kReserve = ke::Align(kStackNeeded + 8, 16) - 8; + + masm.push(ebp); + masm.movl(ebp, esp); + masm.subl(esp, kReserve); + masm.lea(eax, Operand(esp, 3 * 4)); // ptr to 2nd var after argument space + masm.movl(Operand(esp, 2 * 4), eax); // set the ptr as the third argument + masm.lea(eax, Operand(ebp, 8)); // grab the incoming caller argument vector + masm.movl(Operand(esp, 1 * 4), eax); // set that as the 2nd argument + masm.movl(Operand(esp, 0 * 4), ecx); // set |this| as the 1st argument + if (type == ReturnType_Float) + { + masm.call(ExternalAddress(Callback_float)); + } + else if (type == ReturnType_Vector) + { + masm.call(ExternalAddress(Callback_vector)); + } + else + { + masm.call(ExternalAddress(Callback)); + } + masm.movl(ecx, Operand(esp, 3 * 4)); + masm.addl(esp, kReserve); + masm.pop(ebp); // restore ebp + masm.pop(edx); // grab return address in edx + masm.addl(esp, ecx); // remove arguments + masm.jmp(edx); // return to caller + + void *base = g_pSM->GetScriptingEngine()->AllocatePageMemory(masm.length()); + masm.emitToExecutableMemory(base); + return base; +} +#endif + DHooksManager::DHooksManager(HookSetup *setup, void *iface, IPluginFunction *remove_callback, bool post) { this->callback = MakeHandler(setup->returnType); @@ -121,6 +211,27 @@ size_t GetStackArgsSize(DHooksCallback *dg) return res; } +HookReturnStruct::~HookReturnStruct() +{ + if (this->type == ReturnType_String || this->type == ReturnType_Int || this->type == ReturnType_Bool || this->type == ReturnType_Float || this->type == ReturnType_Vector) + { + free(this->newResult); + free(this->orgResult); + } + else if (this->isChanged) + { + if (this->type == ReturnType_CharPtr) + { + delete[](char *)this->newResult; + } + else if (this->type == ReturnType_VectorPtr) + { + delete (SDKVector *)this->newResult; + } + } + +} + HookParamsStruct::~HookParamsStruct() { if (this->orgParams != NULL) @@ -329,7 +440,8 @@ void *Callback(DHooksCallback *dg, void **argStack) dg->plugin_callback->Cancel(); if(returnStruct) { - delete returnStruct; + HandleSecurity sec(dg->plugin_callback->GetParentRuntime()->GetDefaultContext()->GetIdentity(), myself->GetIdentity()); + handlesys->FreeHandle(rHndl, &sec); } if(paramStruct) { diff --git a/vhook.h b/vhook.h index c4f581c..3ebbfc7 100644 --- a/vhook.h +++ b/vhook.h @@ -3,7 +3,13 @@ #include "extension.h" #include -#include + +enum CallingConvention +{ + CallConv_CDECL, + CallConv_THISCALL, + CallConv_STDCALL, +}; enum MRESReturn { @@ -86,29 +92,16 @@ struct ParamInfo SourceHook::PassInfo::PassType pass_type; }; +#ifdef WIN32 +#define OBJECT_OFFSET sizeof(void *) +#else +#define OBJECT_OFFSET (sizeof(void *)*2) +#endif + class HookReturnStruct { public: - ~HookReturnStruct() - { - if(this->type == ReturnType_String || this->type == ReturnType_Int || this->type == ReturnType_Bool || this->type == ReturnType_Float || this->type == ReturnType_Vector) - { - free(this->newResult); - free(this->orgResult); - } - else if(this->isChanged) - { - if(this->type == ReturnType_CharPtr) - { - delete [] (char *)this->newResult; - } - else if(this->type == ReturnType_VectorPtr) - { - delete (SDKVector *)this->newResult; - } - } - - } + ~HookReturnStruct(); public: ReturnType type; bool isChanged; @@ -162,95 +155,7 @@ bool SetupHookManager(ISmmAPI *ismm); void CleanupHooks(IPluginContext *pContext = NULL); size_t GetParamTypeSize(HookParamType type); SourceHook::PassInfo::PassType GetParamTypePassType(HookParamType type); - -#ifndef WIN32 -static void *GenerateThunk(ReturnType type) -{ - sp::MacroAssemblerX86 masm; - static const size_t kStackNeeded = (2) * 4; // 2 args max - static const size_t kReserve = ke::Align(kStackNeeded+8, 16)-8; - - masm.push(ebp); - masm.movl(ebp, esp); - masm.subl(esp, kReserve); - if(type != ReturnType_String && type != ReturnType_Vector) - { - masm.lea(eax, Operand(ebp, 12)); // grab the incoming caller argument vector - masm.movl(Operand(esp, 1 * 4), eax); // set that as the 2nd argument - masm.movl(eax, Operand(ebp, 8)); // grab the |this| - masm.movl(Operand(esp, 0 * 4), eax); // set |this| as the 1st argument*/ - } - else - { - masm.lea(eax, Operand(ebp, 8)); // grab the incoming caller argument vector - masm.movl(Operand(esp, 1 * 4), eax); // set that as the 2nd argument - masm.movl(eax, Operand(ebp, 12)); // grab the |this| - masm.movl(Operand(esp, 0 * 4), eax); // set |this| as the 1st argument*/ - } - if(type == ReturnType_Float) - { - masm.call(ExternalAddress((void *)Callback_float)); - } - else if(type == ReturnType_Vector) - { - masm.call(ExternalAddress((void *)Callback_vector)); - } - else if(type == ReturnType_String) - { - masm.call(ExternalAddress((void *)Callback_stringt)); - } - else - { - masm.call(ExternalAddress((void *)Callback)); - } - masm.addl(esp, kReserve); - masm.pop(ebp); // restore ebp - masm.ret(); - - void *base = g_pSM->GetScriptingEngine()->AllocatePageMemory(masm.length()); - masm.emitToExecutableMemory(base); - return base; -} -#else -// HUGE THANKS TO BAILOPAN (dvander)! -static void *GenerateThunk(ReturnType type) -{ - sp::MacroAssemblerX86 masm; - static const size_t kStackNeeded = (3 + 1) * 4; // 3 args max, 1 locals max - static const size_t kReserve = ke::Align(kStackNeeded+8, 16)-8; - - masm.push(ebp); - masm.movl(ebp, esp); - masm.subl(esp, kReserve); - masm.lea(eax, Operand(esp, 3 * 4)); // ptr to 2nd var after argument space - masm.movl(Operand(esp, 2 * 4), eax); // set the ptr as the third argument - masm.lea(eax, Operand(ebp, 8)); // grab the incoming caller argument vector - masm.movl(Operand(esp, 1 * 4), eax); // set that as the 2nd argument - masm.movl(Operand(esp, 0 * 4), ecx); // set |this| as the 1st argument - if(type == ReturnType_Float) - { - masm.call(ExternalAddress(Callback_float)); - } - else if(type == ReturnType_Vector) - { - masm.call(ExternalAddress(Callback_vector)); - } - else - { - masm.call(ExternalAddress(Callback)); - } - masm.movl(ecx, Operand(esp, 3*4)); - masm.addl(esp, kReserve); - masm.pop(ebp); // restore ebp - masm.pop(edx); // grab return address in edx - masm.addl(esp, ecx); // remove arguments - masm.jmp(edx); // return to caller - - void *base = g_pSM->GetScriptingEngine()->AllocatePageMemory(masm.length()); - masm.emitToExecutableMemory(base); - return base; -} -#endif +void *GenerateThunk(ReturnType type); static DHooksCallback *MakeHandler(ReturnType type) { @@ -280,7 +185,7 @@ public: void **orgParams; void **newParams; bool *isChanged; - DHooksCallback *dg; + DHooksInfo *dg; }; class HookSetup @@ -291,18 +196,33 @@ public: this->returnType = returnType; this->returnFlag = returnFlag; this->hookType = hookType; + this->callConv = CallConv_THISCALL; this->thisType = thisType; this->offset = offset; + this->funcAddr = nullptr; this->callback = callback; }; + HookSetup(ReturnType returnType, unsigned int returnFlag, CallingConvention callConv, ThisPointerType thisType, void *funcAddr) + { + this->returnType = returnType; + this->returnFlag = returnFlag; + this->hookType = HookType_Raw; + this->callConv = callConv; + this->thisType = thisType; + this->offset = -1; + this->funcAddr = funcAddr; + this->callback = nullptr; + }; ~HookSetup(){}; public: unsigned int returnFlag; ReturnType returnType; HookType hookType; + CallingConvention callConv; ThisPointerType thisType; SourceHook::CVector params; int offset; + void *funcAddr; IPluginFunction *callback; }; @@ -335,6 +255,7 @@ public: }; size_t GetStackArgsSize(DHooksCallback *dg); +cell_t GetThisPtr(void *iface, ThisPointerType type); extern IBinTools *g_pBinTools; extern HandleType_t g_HookParamsHandle;