From 00bb796db2a3aa9109dc363f728ef454055f8835 Mon Sep 17 00:00:00 2001 From: Scott Ehlert Date: Thu, 6 Nov 2008 17:07:53 -0600 Subject: [PATCH] Version 1.2 of fPIC script - Now works with GCC 4.3 compiled binaries (like Left 4 Dead) - Fixed: Redefining alignment blocks as data caused IDA to pop up an annoying warning --- editor/ida/gcc_fpic.idc | 762 +++++++++++++++++++++------------------- 1 file changed, 391 insertions(+), 371 deletions(-) diff --git a/editor/ida/gcc_fpic.idc b/editor/ida/gcc_fpic.idc index e75e4b3f..8c3fb585 100644 --- a/editor/ida/gcc_fpic.idc +++ b/editor/ida/gcc_fpic.idc @@ -1,371 +1,391 @@ -// ----------------------------------------------------------------------------- -// - IDA Pro Script - -// Name: gcc_fpic.idc -// By: Damaged Soul -// Desc: Add references for strings, variables, and other data that seem mangled -// due to GCC's -fPIC option and the .got section of an x86 ELF binary. -// -// Version 1.0 - November 22, 2007 -// Version 1.1 - May 02, 2008 - Now works with GCC 4.x compiled binaries -// ----------------------------------------------------------------------------- - -#include - -#define REG_NONE 0 -#define REG_EAX 1 -#define REG_EBX 2 -#define REG_ECX 3 -#define REG_EDX 4 - -#define OP_ADD 1 -#define OP_SUB 2 - -#define OPFORMAT_STRING 1 -#define OPFORMAT_DEREF 2 -#define OPFORMAT_NORMAL 3 - -static main() -{ - auto filetype, compiler, demang, strPrefix; - auto seg, codeStart, codeEnd, roStart, roEnd, gotStart, gotEnd, opformat; - auto addr, funcend, whichop, reg, tempstr; - auto operand1, operand2, opval, opstr, dataAddr, flags, count; - - SetStatus(IDA_STATUS_WORK); - Message("Starting scan for -fPIC code...\n"); - - /* Check file type and compiler */ - filetype = GetShortPrm(INF_FILETYPE); - if (filetype != FT_ELF) - { - Message("Scan aborted. Input file must be using ELF binary format!\n"); - SetStatus(IDA_STATUS_READY); - return; - } - compiler = GetCharPrm(INF_COMPILER); - if (compiler != COMP_GNU) - { - Message("Scan aborted. Input file must have been compiled with GNU GCC/G++!\n"); - SetStatus(IDA_STATUS_READY); - return; - } - - /* - * If the GCC v3.x names option is not set, then set it first. - * - * :TODO: Need to change this if GCC 2.95 support is to be added. - */ - demang = GetCharPrm(INF_DEMNAMES); - if ((demang & DEMNAM_GCC3) == 0) - { - SetCharPrm(INF_DEMNAMES, demang | DEMNAM_GCC3); - } - - /* Get string prefix */ - strPrefix = GetCharPrm(INF_ASCIIPREF); - - /* Get address of first section in binary */ - seg = FirstSeg(); - - /* Iterate through all sections and get address of .text, .rodata, and .got */ - while (seg != BADADDR) - { - if (SegName(seg) == ".text") - { - codeStart = seg; - codeEnd = NextSeg(seg); - Message("%08.8Xh - %08.8Xh: .text\n", codeStart, codeEnd); - } - else if (SegName(seg) == ".rodata") - { - roStart = seg; - roEnd = NextSeg(seg); - Message("%08.8Xh - %08.8Xh: .rodata\n", roStart, roEnd); - } - else if (SegName(seg) == "abs") - { - addr = FindText(seg, SEARCH_DOWN|SEARCH_CASE|SEARCH_NOSHOW, 0, 0, "_GLOBAL_OFFSET_TABLE_"); - gotStart = Dword(addr); - gotEnd = NextSeg(gotStart); - Message("%08.8Xh - %08.8Xh: .got\n", gotStart, gotEnd); - } - - seg = NextSeg(seg); - } - - addr = codeStart; - funcend = -1; - count = 0; - - /** - * Go through .text section while looking for anything like [e?x+blah] or [e?x-blah]. - * - * The eax, ebx, ecx, or edx registers are used for storing the address of the .got - * section with -fPIC code. An offset, either negative or positive is added to e?x. - * This results in the address of a string, variable, or other type of data. - * - * In order to determine which register .got will be stored in, one can look at which - * __i686.get_pc_thunk.? function is called near the beginning of each function. The - * suffix on this function is either ax, bx, cx, or dx and corresponds to eax, ebx, - * ecx, or edx. - */ - while (addr <= codeEnd) - { - operand1 = GetOpnd(addr, 0); - operand2 = GetOpnd(addr, 1); - whichop = -1; - - /* Get function end */ - if (FindFuncEnd(addr) != funcend) - { - reg = REG_NONE; - funcend = FindFuncEnd(addr); - } - - /* Get current PIC register */ - reg = GetPICRegister(addr, reg); - - if (reg != REG_NONE) - { - /* Search first operand for substring containing PIC register and either a plus or minus sign */ - if (strstr(operand1, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand1, GetPICSearchString(reg, OP_SUB)) != -1) - { - whichop = 0; - } - - /* Search second operand for substring containing PIC register and either a plus or minus sign */ - if (strstr(operand2, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand2, GetPICSearchString(reg, OP_SUB)) != -1) - { - whichop = 1; - } - } - - if (whichop != -1) - { - /* Get .got offset */ - opval = GetOperandValue(addr, whichop); - - /* Get address inside .got */ - dataAddr = gotStart + opval; - - /* Get name at address if it exists */ - opstr = Name(dataAddr); - - /* If name doesn't exist then... */ - if (opstr == "") - { - /* - * Check address to see if it falls in .rodata section. - * If it does, then try to make it a string which will automatically give it a name. - */ - if (dataAddr >= roStart && dataAddr <= roEnd && MakeStr(dataAddr, BADADDR)) - { - /* Get automatically created name */ - opstr = Name(dataAddr); - opformat = OPFORMAT_STRING; - - /* - * Sometimes IDA creates a string successfully but not exactly in the right place. - * Uncertain as to why this is (perhaps an IDA bug?), but usually the string in - * question is a bunch of garbage. - */ - if (opstr == "") - { - /* Create a name based on the address */ - opstr = form("unk_%X", dataAddr); - MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN); - opformat = OPFORMAT_DEREF; - } - } - else - { - /* - * If address didn't fall into .rodata and the string creation was unsuccessful, - * then try to read the address at 'addr' and get the name of that. - */ - opstr = Name(Dword(dataAddr)); - if (opstr == "") - { - /* If name doesn't exist for that, then create name based on address */ - opstr = form("unk_%X", dataAddr); - MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN); - opformat = OPFORMAT_DEREF; - } - else - { - /* If the name did exist at this point, then use it */ - opformat = OPFORMAT_NORMAL; - } - } - } - else - { - /* If the name at the original address does exist then ... */ - - flags = GetFlags(dataAddr); - - /* - * If this address falls into .rodata section and is considered an existing string - * then the replacement operand needs to shown as a string. - */ - if (dataAddr >= roStart && dataAddr <= roEnd && strPrefix != "" && strstr(opstr, strPrefix) == 0) - { - opformat = OPFORMAT_STRING; - } - else - { - opformat = OPFORMAT_DEREF; - } - } - - /* - * Try to demangle the name that was found or created above and print it as sort of - * a status message to show the the script is still doing work as this usually - * can take awhile. - */ - tempstr = Demangle(opstr, INF_LONG_DN); - if (tempstr != "") - { - Message("%8.8Xh: %s\n", addr, tempstr); - } - else - { - Message("%8.8Xh: %s\n", addr, opstr); - } - - /* - * The operand that was found to have the PIC register will now be replaced - * with more descriptive text. The format of this text depends upon the value - * of opformat. - */ - OpAlt(addr, whichop, DoOperandFormat(opformat, opstr)); - - count++; - } - - addr++; - } - - Message("Scan for PIC code is complete! Found %d data items referenced via PIC register.\n", count); - Message("Please re-open the database so that newly found strings will appear in the Strings window\n"); - SetStatus(IDA_STATUS_READY); -} - -/* - * Tries to determine the current PIC register given the current address being processed - * and the previous PIC register. - */ -static GetPICRegister(addr, previous) -{ - auto assemblyStr, idx, reg; - assemblyStr = GetDisasm(addr); - - if ((idx = strstr(assemblyStr, "call __i686_get_pc_thunk_")) != -1) - { - /* 28 is the length of the above string */ - reg = substr(assemblyStr, idx + 28, 30); - - if (reg == "ax") - { - return REG_EAX; - } - else if (reg == "bx") - { - return REG_EBX; - } - else if (reg == "cx") - { - return REG_ECX; - } - else if (reg == "dx") - { - return REG_EDX; - } - } - - return previous; -} - -/* - * Returns a string that is used as a substring search containing the specified - * PIC register and operator (+ or -). - */ -static GetPICSearchString(reg, operator) -{ - if (reg == REG_EAX) - { - if (operator == OP_ADD) - { - return "[eax+"; - } - else if (operator == OP_SUB) - { - return "[eax-"; - } - } - else if (reg == REG_EBX) - { - if (operator == OP_ADD) - { - return "[ebx+"; - } - else if (operator == OP_SUB) - { - return "[ebx-"; - } - } - else if (reg == REG_ECX) - { - if (operator == OP_ADD) - { - return "[ecx+"; - } - else if (operator == OP_SUB) - { - return "[ecx-"; - } - } - else if (reg == REG_EDX) - { - if (operator == OP_ADD) - { - return "[edx+"; - } - else if (operator == OP_SUB) - { - return "[edx-"; - } - } -} - -/* - * Returns a formatted string depending upon the value of the format param. - * This will be the replacement for the operand containing the PIC register. - * - * OPFORMAT_STRING: The referenced data address is a string in the .rodata section. - * OPFORMAT_DEREF: The referenced data address has a name. The PIC register operand is - * deferenced so the dereference brackets are shown in the returned string. - * OPFORMAT_NORMAL: The referenced data address does not have a name. But upon reading - * the address at the referenced data address, it is discovered that that - * address does have a name. There are no dereference brackets because the - * referenced data address had to be read in order to discover a name. - */ -static DoOperandFormat(format, str) -{ - if (format == OPFORMAT_STRING) - { - return form("offset %s", str); - } - else if (format == OPFORMAT_DEREF) - { - return form("[ds:%s]", str); - } - else if (format == OPFORMAT_NORMAL) - { - return form("ds:%s", str); - } - else - { - return str; - } -} +// ----------------------------------------------------------------------------- +// - IDA Pro Script - +// Name: gcc_fpic.idc +// By: Damaged Soul +// Desc: Add references for strings, variables, and other data that seem mangled +// due to GCC's -fPIC option and the .got section of an x86 ELF binary. +// +// Version History +// 1.0 [2007-11-22] +// - Initial Version +// 1.1 [2008-05-02] +// - Now works with GCC 4.x compiled binaries +// 1.2 [2008-11-06] +// - Now works with GCC 4.3 compiled binaries +// - Fixed: Redefining alignment blocks as data caused IDA to pop up +// an annoying warning +// ----------------------------------------------------------------------------- + +#include + +#define REG_NONE 0 +#define REG_EAX 1 +#define REG_EBX 2 +#define REG_ECX 3 +#define REG_EDX 4 + +#define OP_ADD 1 +#define OP_SUB 2 + +#define OPFORMAT_STRING 1 +#define OPFORMAT_DEREF 2 +#define OPFORMAT_NORMAL 3 + +static main() +{ + auto filetype, compiler, demang, strPrefix; + auto seg, codeStart, codeEnd, roStart, roEnd, gotStart, gotEnd, opformat; + auto addr, funcend, whichop, reg, tempstr; + auto operand1, operand2, opval, opstr, dataAddr, flags, count; + + SetStatus(IDA_STATUS_WORK); + Message("Starting scan for -fPIC code...\n"); + + /* Check file type and compiler */ + filetype = GetShortPrm(INF_FILETYPE); + if (filetype != FT_ELF) + { + Message("Scan aborted. Input file must be using ELF binary format!\n"); + SetStatus(IDA_STATUS_READY); + return; + } + compiler = GetCharPrm(INF_COMPILER); + if (compiler != COMP_GNU) + { + Message("Scan aborted. Input file must have been compiled with GNU GCC/G++!\n"); + SetStatus(IDA_STATUS_READY); + return; + } + + /* + * If the GCC v3.x names option is not set, then set it first. + * + * :TODO: Need to change this if GCC 2.95 support is to be added. + */ + demang = GetCharPrm(INF_DEMNAMES); + if ((demang & DEMNAM_GCC3) == 0) + { + SetCharPrm(INF_DEMNAMES, demang | DEMNAM_GCC3); + } + + /* Get string prefix */ + strPrefix = GetCharPrm(INF_ASCIIPREF); + + /* Get address of first section in binary */ + seg = FirstSeg(); + + /* Iterate through all sections and get address of .text, .rodata, and .got */ + while (seg != BADADDR) + { + if (SegName(seg) == ".text") + { + codeStart = seg; + codeEnd = NextSeg(seg); + Message("%08.8Xh - %08.8Xh: .text\n", codeStart, codeEnd); + } + else if (SegName(seg) == ".rodata") + { + roStart = seg; + roEnd = NextSeg(seg); + Message("%08.8Xh - %08.8Xh: .rodata\n", roStart, roEnd); + } + else if (SegName(seg) == "abs") + { + addr = FindText(seg, SEARCH_DOWN|SEARCH_CASE|SEARCH_NOSHOW, 0, 0, "_GLOBAL_OFFSET_TABLE_"); + gotStart = Dword(addr); + gotEnd = NextSeg(gotStart); + Message("%08.8Xh - %08.8Xh: .got\n", gotStart, gotEnd); + } + + seg = NextSeg(seg); + } + + addr = codeStart; + funcend = -1; + count = 0; + + /** + * Go through .text section while looking for anything like [e?x+blah] or [e?x-blah]. + * + * The eax, ebx, ecx, or edx registers are used for storing the address of the .got + * section with -fPIC code. An offset, either negative or positive is added to e?x. + * This results in the address of a string, variable, or other type of data. + * + * In order to determine which register .got will be stored in, one can look at which + * __i686.get_pc_thunk.? function is called near the beginning of each function. The + * suffix on this function is either ax, bx, cx, or dx and corresponds to eax, ebx, + * ecx, or edx. + */ + while (addr <= codeEnd) + { + operand1 = GetOpnd(addr, 0); + operand2 = GetOpnd(addr, 1); + whichop = -1; + + /* Get function end */ + if (FindFuncEnd(addr) != funcend) + { + reg = REG_NONE; + funcend = FindFuncEnd(addr); + } + + /* Get current PIC register */ + reg = GetPICRegister(addr, reg, funcend); + + if (reg != REG_NONE) + { + /* Search first operand for substring containing PIC register and either a plus or minus sign */ + if (strstr(operand1, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand1, GetPICSearchString(reg, OP_SUB)) != -1) + { + whichop = 0; + } + + /* Search second operand for substring containing PIC register and either a plus or minus sign */ + if (strstr(operand2, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand2, GetPICSearchString(reg, OP_SUB)) != -1) + { + whichop = 1; + } + } + + if (whichop != -1) + { + /* Get .got offset */ + opval = GetOperandValue(addr, whichop); + + /* Get address inside .got */ + dataAddr = gotStart + opval; + + /* Get name at address if it exists */ + opstr = Name(dataAddr); + + /* If name doesn't exist then... */ + if (opstr == "") + { + /* + * Check address to see if it falls in .rodata section. + * If it does, then try to make it a string which will automatically give it a name. + */ + if (dataAddr >= roStart && dataAddr <= roEnd && MakeStr(dataAddr, BADADDR)) + { + /* Get automatically created name */ + opstr = Name(dataAddr); + opformat = OPFORMAT_STRING; + + /* + * Sometimes IDA creates a string successfully but not exactly in the right place. + * Uncertain as to why this is (perhaps an IDA bug?), but usually the string in + * question is a bunch of garbage. + */ + if (opstr == "") + { + /* Create a name based on the address */ + opstr = form("unk_%X", dataAddr); + if (strstr(GetDisasm(dataAddr), "align") != -1) + { + MakeUnkn(dataAddr, DOUNK_SIMPLE); + } + MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN); + opformat = OPFORMAT_DEREF; + } + } + else + { + /* + * If address didn't fall into .rodata and the string creation was unsuccessful, + * then try to read the address at 'addr' and get the name of that. + */ + opstr = Name(Dword(dataAddr)); + if (opstr == "") + { + /* If name doesn't exist for that, then create name based on address */ + opstr = form("unk_%X", dataAddr); + if (strstr(GetDisasm(dataAddr), "align") != -1) + { + MakeUnkn(dataAddr, DOUNK_SIMPLE); + } + MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN); + opformat = OPFORMAT_DEREF; + } + else + { + /* If the name did exist at this point, then use it */ + opformat = OPFORMAT_NORMAL; + } + } + } + else + { + /* If the name at the original address does exist then ... */ + + flags = GetFlags(dataAddr); + + /* + * If this address falls into .rodata section and is considered an existing string + * then the replacement operand needs to shown as a string. + */ + if (dataAddr >= roStart && dataAddr <= roEnd && strPrefix != "" && strstr(opstr, strPrefix) == 0) + { + opformat = OPFORMAT_STRING; + } + else + { + opformat = OPFORMAT_DEREF; + } + } + + /* + * Try to demangle the name that was found or created above and print it as sort of + * a status message to show the the script is still doing work as this usually + * can take awhile. + */ + tempstr = Demangle(opstr, INF_LONG_DN); + if (tempstr != "") + { + Message("%8.8Xh: %s\n", addr, tempstr); + } + else + { + Message("%8.8Xh: %s\n", addr, opstr); + } + + /* + * The operand that was found to have the PIC register will now be replaced + * with more descriptive text. The format of this text depends upon the value + * of opformat. + */ + OpAlt(addr, whichop, DoOperandFormat(opformat, opstr)); + + count++; + } + + addr++; + } + + Message("Scan for PIC code is complete! Found %d data items referenced via PIC register.\n", count); + Message("Please re-open the database so that newly found strings will appear in the Strings window\n"); + SetStatus(IDA_STATUS_READY); +} + +/* + * Tries to determine the current PIC register given the current address being processed + * and the previous PIC register. + */ +static GetPICRegister(addr, previous, funcend) +{ + auto assemblyStr, idx, reg, ab; + assemblyStr = GetDisasm(addr); + + if ((idx = strstr(assemblyStr, "call __i686_get_pc_thunk_")) != -1) + { + /* 28 is the length of the above string */ + reg = substr(assemblyStr, idx + 28, 30); + } + else if (strstr(assemblyStr, "call $+5") != -1) + { + assemblyStr = GetDisasm(NextHead(addr, funcend)); + reg = substr(assemblyStr, 9, 11); + } + + if (reg == "ax") + { + return REG_EAX; + } + else if (reg == "bx") + { + return REG_EBX; + } + else if (reg == "cx") + { + return REG_ECX; + } + else if (reg == "dx") + { + return REG_EDX; + } + + return previous; +} + +/* + * Returns a string that is used as a substring search containing the specified + * PIC register and operator (+ or -). + */ +static GetPICSearchString(reg, operator) +{ + if (reg == REG_EAX) + { + if (operator == OP_ADD) + { + return "[eax+"; + } + else if (operator == OP_SUB) + { + return "[eax-"; + } + } + else if (reg == REG_EBX) + { + if (operator == OP_ADD) + { + return "[ebx+"; + } + else if (operator == OP_SUB) + { + return "[ebx-"; + } + } + else if (reg == REG_ECX) + { + if (operator == OP_ADD) + { + return "[ecx+"; + } + else if (operator == OP_SUB) + { + return "[ecx-"; + } + } + else if (reg == REG_EDX) + { + if (operator == OP_ADD) + { + return "[edx+"; + } + else if (operator == OP_SUB) + { + return "[edx-"; + } + } +} + +/* + * Returns a formatted string depending upon the value of the format param. + * This will be the replacement for the operand containing the PIC register. + * + * OPFORMAT_STRING: The referenced data address is a string in the .rodata section. + * OPFORMAT_DEREF: The referenced data address has a name. The PIC register operand is + * deferenced so the dereference brackets are shown in the returned string. + * OPFORMAT_NORMAL: The referenced data address does not have a name. But upon reading + * the address at the referenced data address, it is discovered that that + * address does have a name. There are no dereference brackets because the + * referenced data address had to be read in order to discover a name. + */ +static DoOperandFormat(format, str) +{ + if (format == OPFORMAT_STRING) + { + return form("offset %s", str); + } + else if (format == OPFORMAT_DEREF) + { + return form("[ds:%s]", str); + } + else if (format == OPFORMAT_NORMAL) + { + return form("ds:%s", str); + } + else + { + return str; + } +}