diff --git a/editor/ida/gcc_fpic.idc b/editor/ida/gcc_fpic.idc new file mode 100644 index 00000000..e75e4b3f --- /dev/null +++ b/editor/ida/gcc_fpic.idc @@ -0,0 +1,371 @@ +// ----------------------------------------------------------------------------- +// - IDA Pro Script - +// Name: gcc_fpic.idc +// By: Damaged Soul +// Desc: Add references for strings, variables, and other data that seem mangled +// due to GCC's -fPIC option and the .got section of an x86 ELF binary. +// +// Version 1.0 - November 22, 2007 +// Version 1.1 - May 02, 2008 - Now works with GCC 4.x compiled binaries +// ----------------------------------------------------------------------------- + +#include + +#define REG_NONE 0 +#define REG_EAX 1 +#define REG_EBX 2 +#define REG_ECX 3 +#define REG_EDX 4 + +#define OP_ADD 1 +#define OP_SUB 2 + +#define OPFORMAT_STRING 1 +#define OPFORMAT_DEREF 2 +#define OPFORMAT_NORMAL 3 + +static main() +{ + auto filetype, compiler, demang, strPrefix; + auto seg, codeStart, codeEnd, roStart, roEnd, gotStart, gotEnd, opformat; + auto addr, funcend, whichop, reg, tempstr; + auto operand1, operand2, opval, opstr, dataAddr, flags, count; + + SetStatus(IDA_STATUS_WORK); + Message("Starting scan for -fPIC code...\n"); + + /* Check file type and compiler */ + filetype = GetShortPrm(INF_FILETYPE); + if (filetype != FT_ELF) + { + Message("Scan aborted. Input file must be using ELF binary format!\n"); + SetStatus(IDA_STATUS_READY); + return; + } + compiler = GetCharPrm(INF_COMPILER); + if (compiler != COMP_GNU) + { + Message("Scan aborted. Input file must have been compiled with GNU GCC/G++!\n"); + SetStatus(IDA_STATUS_READY); + return; + } + + /* + * If the GCC v3.x names option is not set, then set it first. + * + * :TODO: Need to change this if GCC 2.95 support is to be added. + */ + demang = GetCharPrm(INF_DEMNAMES); + if ((demang & DEMNAM_GCC3) == 0) + { + SetCharPrm(INF_DEMNAMES, demang | DEMNAM_GCC3); + } + + /* Get string prefix */ + strPrefix = GetCharPrm(INF_ASCIIPREF); + + /* Get address of first section in binary */ + seg = FirstSeg(); + + /* Iterate through all sections and get address of .text, .rodata, and .got */ + while (seg != BADADDR) + { + if (SegName(seg) == ".text") + { + codeStart = seg; + codeEnd = NextSeg(seg); + Message("%08.8Xh - %08.8Xh: .text\n", codeStart, codeEnd); + } + else if (SegName(seg) == ".rodata") + { + roStart = seg; + roEnd = NextSeg(seg); + Message("%08.8Xh - %08.8Xh: .rodata\n", roStart, roEnd); + } + else if (SegName(seg) == "abs") + { + addr = FindText(seg, SEARCH_DOWN|SEARCH_CASE|SEARCH_NOSHOW, 0, 0, "_GLOBAL_OFFSET_TABLE_"); + gotStart = Dword(addr); + gotEnd = NextSeg(gotStart); + Message("%08.8Xh - %08.8Xh: .got\n", gotStart, gotEnd); + } + + seg = NextSeg(seg); + } + + addr = codeStart; + funcend = -1; + count = 0; + + /** + * Go through .text section while looking for anything like [e?x+blah] or [e?x-blah]. + * + * The eax, ebx, ecx, or edx registers are used for storing the address of the .got + * section with -fPIC code. An offset, either negative or positive is added to e?x. + * This results in the address of a string, variable, or other type of data. + * + * In order to determine which register .got will be stored in, one can look at which + * __i686.get_pc_thunk.? function is called near the beginning of each function. The + * suffix on this function is either ax, bx, cx, or dx and corresponds to eax, ebx, + * ecx, or edx. + */ + while (addr <= codeEnd) + { + operand1 = GetOpnd(addr, 0); + operand2 = GetOpnd(addr, 1); + whichop = -1; + + /* Get function end */ + if (FindFuncEnd(addr) != funcend) + { + reg = REG_NONE; + funcend = FindFuncEnd(addr); + } + + /* Get current PIC register */ + reg = GetPICRegister(addr, reg); + + if (reg != REG_NONE) + { + /* Search first operand for substring containing PIC register and either a plus or minus sign */ + if (strstr(operand1, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand1, GetPICSearchString(reg, OP_SUB)) != -1) + { + whichop = 0; + } + + /* Search second operand for substring containing PIC register and either a plus or minus sign */ + if (strstr(operand2, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand2, GetPICSearchString(reg, OP_SUB)) != -1) + { + whichop = 1; + } + } + + if (whichop != -1) + { + /* Get .got offset */ + opval = GetOperandValue(addr, whichop); + + /* Get address inside .got */ + dataAddr = gotStart + opval; + + /* Get name at address if it exists */ + opstr = Name(dataAddr); + + /* If name doesn't exist then... */ + if (opstr == "") + { + /* + * Check address to see if it falls in .rodata section. + * If it does, then try to make it a string which will automatically give it a name. + */ + if (dataAddr >= roStart && dataAddr <= roEnd && MakeStr(dataAddr, BADADDR)) + { + /* Get automatically created name */ + opstr = Name(dataAddr); + opformat = OPFORMAT_STRING; + + /* + * Sometimes IDA creates a string successfully but not exactly in the right place. + * Uncertain as to why this is (perhaps an IDA bug?), but usually the string in + * question is a bunch of garbage. + */ + if (opstr == "") + { + /* Create a name based on the address */ + opstr = form("unk_%X", dataAddr); + MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN); + opformat = OPFORMAT_DEREF; + } + } + else + { + /* + * If address didn't fall into .rodata and the string creation was unsuccessful, + * then try to read the address at 'addr' and get the name of that. + */ + opstr = Name(Dword(dataAddr)); + if (opstr == "") + { + /* If name doesn't exist for that, then create name based on address */ + opstr = form("unk_%X", dataAddr); + MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN); + opformat = OPFORMAT_DEREF; + } + else + { + /* If the name did exist at this point, then use it */ + opformat = OPFORMAT_NORMAL; + } + } + } + else + { + /* If the name at the original address does exist then ... */ + + flags = GetFlags(dataAddr); + + /* + * If this address falls into .rodata section and is considered an existing string + * then the replacement operand needs to shown as a string. + */ + if (dataAddr >= roStart && dataAddr <= roEnd && strPrefix != "" && strstr(opstr, strPrefix) == 0) + { + opformat = OPFORMAT_STRING; + } + else + { + opformat = OPFORMAT_DEREF; + } + } + + /* + * Try to demangle the name that was found or created above and print it as sort of + * a status message to show the the script is still doing work as this usually + * can take awhile. + */ + tempstr = Demangle(opstr, INF_LONG_DN); + if (tempstr != "") + { + Message("%8.8Xh: %s\n", addr, tempstr); + } + else + { + Message("%8.8Xh: %s\n", addr, opstr); + } + + /* + * The operand that was found to have the PIC register will now be replaced + * with more descriptive text. The format of this text depends upon the value + * of opformat. + */ + OpAlt(addr, whichop, DoOperandFormat(opformat, opstr)); + + count++; + } + + addr++; + } + + Message("Scan for PIC code is complete! Found %d data items referenced via PIC register.\n", count); + Message("Please re-open the database so that newly found strings will appear in the Strings window\n"); + SetStatus(IDA_STATUS_READY); +} + +/* + * Tries to determine the current PIC register given the current address being processed + * and the previous PIC register. + */ +static GetPICRegister(addr, previous) +{ + auto assemblyStr, idx, reg; + assemblyStr = GetDisasm(addr); + + if ((idx = strstr(assemblyStr, "call __i686_get_pc_thunk_")) != -1) + { + /* 28 is the length of the above string */ + reg = substr(assemblyStr, idx + 28, 30); + + if (reg == "ax") + { + return REG_EAX; + } + else if (reg == "bx") + { + return REG_EBX; + } + else if (reg == "cx") + { + return REG_ECX; + } + else if (reg == "dx") + { + return REG_EDX; + } + } + + return previous; +} + +/* + * Returns a string that is used as a substring search containing the specified + * PIC register and operator (+ or -). + */ +static GetPICSearchString(reg, operator) +{ + if (reg == REG_EAX) + { + if (operator == OP_ADD) + { + return "[eax+"; + } + else if (operator == OP_SUB) + { + return "[eax-"; + } + } + else if (reg == REG_EBX) + { + if (operator == OP_ADD) + { + return "[ebx+"; + } + else if (operator == OP_SUB) + { + return "[ebx-"; + } + } + else if (reg == REG_ECX) + { + if (operator == OP_ADD) + { + return "[ecx+"; + } + else if (operator == OP_SUB) + { + return "[ecx-"; + } + } + else if (reg == REG_EDX) + { + if (operator == OP_ADD) + { + return "[edx+"; + } + else if (operator == OP_SUB) + { + return "[edx-"; + } + } +} + +/* + * Returns a formatted string depending upon the value of the format param. + * This will be the replacement for the operand containing the PIC register. + * + * OPFORMAT_STRING: The referenced data address is a string in the .rodata section. + * OPFORMAT_DEREF: The referenced data address has a name. The PIC register operand is + * deferenced so the dereference brackets are shown in the returned string. + * OPFORMAT_NORMAL: The referenced data address does not have a name. But upon reading + * the address at the referenced data address, it is discovered that that + * address does have a name. There are no dereference brackets because the + * referenced data address had to be read in order to discover a name. + */ +static DoOperandFormat(format, str) +{ + if (format == OPFORMAT_STRING) + { + return form("offset %s", str); + } + else if (format == OPFORMAT_DEREF) + { + return form("[ds:%s]", str); + } + else if (format == OPFORMAT_NORMAL) + { + return form("ds:%s", str); + } + else + { + return str; + } +}