This commit is contained in:
David Anderson 2008-11-11 01:37:24 -08:00
commit c880919466

View File

@ -1,371 +1,391 @@
// -----------------------------------------------------------------------------
// - IDA Pro Script -
// Name: gcc_fpic.idc
// By: Damaged Soul
// Desc: Add references for strings, variables, and other data that seem mangled
// due to GCC's -fPIC option and the .got section of an x86 ELF binary.
//
// Version 1.0 - November 22, 2007
// Version 1.1 - May 02, 2008 - Now works with GCC 4.x compiled binaries
// -----------------------------------------------------------------------------
#include <idc.idc>
#define REG_NONE 0
#define REG_EAX 1
#define REG_EBX 2
#define REG_ECX 3
#define REG_EDX 4
#define OP_ADD 1
#define OP_SUB 2
#define OPFORMAT_STRING 1
#define OPFORMAT_DEREF 2
#define OPFORMAT_NORMAL 3
static main()
{
auto filetype, compiler, demang, strPrefix;
auto seg, codeStart, codeEnd, roStart, roEnd, gotStart, gotEnd, opformat;
auto addr, funcend, whichop, reg, tempstr;
auto operand1, operand2, opval, opstr, dataAddr, flags, count;
SetStatus(IDA_STATUS_WORK);
Message("Starting scan for -fPIC code...\n");
/* Check file type and compiler */
filetype = GetShortPrm(INF_FILETYPE);
if (filetype != FT_ELF)
{
Message("Scan aborted. Input file must be using ELF binary format!\n");
SetStatus(IDA_STATUS_READY);
return;
}
compiler = GetCharPrm(INF_COMPILER);
if (compiler != COMP_GNU)
{
Message("Scan aborted. Input file must have been compiled with GNU GCC/G++!\n");
SetStatus(IDA_STATUS_READY);
return;
}
/*
* If the GCC v3.x names option is not set, then set it first.
*
* :TODO: Need to change this if GCC 2.95 support is to be added.
*/
demang = GetCharPrm(INF_DEMNAMES);
if ((demang & DEMNAM_GCC3) == 0)
{
SetCharPrm(INF_DEMNAMES, demang | DEMNAM_GCC3);
}
/* Get string prefix */
strPrefix = GetCharPrm(INF_ASCIIPREF);
/* Get address of first section in binary */
seg = FirstSeg();
/* Iterate through all sections and get address of .text, .rodata, and .got */
while (seg != BADADDR)
{
if (SegName(seg) == ".text")
{
codeStart = seg;
codeEnd = NextSeg(seg);
Message("%08.8Xh - %08.8Xh: .text\n", codeStart, codeEnd);
}
else if (SegName(seg) == ".rodata")
{
roStart = seg;
roEnd = NextSeg(seg);
Message("%08.8Xh - %08.8Xh: .rodata\n", roStart, roEnd);
}
else if (SegName(seg) == "abs")
{
addr = FindText(seg, SEARCH_DOWN|SEARCH_CASE|SEARCH_NOSHOW, 0, 0, "_GLOBAL_OFFSET_TABLE_");
gotStart = Dword(addr);
gotEnd = NextSeg(gotStart);
Message("%08.8Xh - %08.8Xh: .got\n", gotStart, gotEnd);
}
seg = NextSeg(seg);
}
addr = codeStart;
funcend = -1;
count = 0;
/**
* Go through .text section while looking for anything like [e?x+blah] or [e?x-blah].
*
* The eax, ebx, ecx, or edx registers are used for storing the address of the .got
* section with -fPIC code. An offset, either negative or positive is added to e?x.
* This results in the address of a string, variable, or other type of data.
*
* In order to determine which register .got will be stored in, one can look at which
* __i686.get_pc_thunk.? function is called near the beginning of each function. The
* suffix on this function is either ax, bx, cx, or dx and corresponds to eax, ebx,
* ecx, or edx.
*/
while (addr <= codeEnd)
{
operand1 = GetOpnd(addr, 0);
operand2 = GetOpnd(addr, 1);
whichop = -1;
/* Get function end */
if (FindFuncEnd(addr) != funcend)
{
reg = REG_NONE;
funcend = FindFuncEnd(addr);
}
/* Get current PIC register */
reg = GetPICRegister(addr, reg);
if (reg != REG_NONE)
{
/* Search first operand for substring containing PIC register and either a plus or minus sign */
if (strstr(operand1, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand1, GetPICSearchString(reg, OP_SUB)) != -1)
{
whichop = 0;
}
/* Search second operand for substring containing PIC register and either a plus or minus sign */
if (strstr(operand2, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand2, GetPICSearchString(reg, OP_SUB)) != -1)
{
whichop = 1;
}
}
if (whichop != -1)
{
/* Get .got offset */
opval = GetOperandValue(addr, whichop);
/* Get address inside .got */
dataAddr = gotStart + opval;
/* Get name at address if it exists */
opstr = Name(dataAddr);
/* If name doesn't exist then... */
if (opstr == "")
{
/*
* Check address to see if it falls in .rodata section.
* If it does, then try to make it a string which will automatically give it a name.
*/
if (dataAddr >= roStart && dataAddr <= roEnd && MakeStr(dataAddr, BADADDR))
{
/* Get automatically created name */
opstr = Name(dataAddr);
opformat = OPFORMAT_STRING;
/*
* Sometimes IDA creates a string successfully but not exactly in the right place.
* Uncertain as to why this is (perhaps an IDA bug?), but usually the string in
* question is a bunch of garbage.
*/
if (opstr == "")
{
/* Create a name based on the address */
opstr = form("unk_%X", dataAddr);
MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN);
opformat = OPFORMAT_DEREF;
}
}
else
{
/*
* If address didn't fall into .rodata and the string creation was unsuccessful,
* then try to read the address at 'addr' and get the name of that.
*/
opstr = Name(Dword(dataAddr));
if (opstr == "")
{
/* If name doesn't exist for that, then create name based on address */
opstr = form("unk_%X", dataAddr);
MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN);
opformat = OPFORMAT_DEREF;
}
else
{
/* If the name did exist at this point, then use it */
opformat = OPFORMAT_NORMAL;
}
}
}
else
{
/* If the name at the original address does exist then ... */
flags = GetFlags(dataAddr);
/*
* If this address falls into .rodata section and is considered an existing string
* then the replacement operand needs to shown as a string.
*/
if (dataAddr >= roStart && dataAddr <= roEnd && strPrefix != "" && strstr(opstr, strPrefix) == 0)
{
opformat = OPFORMAT_STRING;
}
else
{
opformat = OPFORMAT_DEREF;
}
}
/*
* Try to demangle the name that was found or created above and print it as sort of
* a status message to show the the script is still doing work as this usually
* can take awhile.
*/
tempstr = Demangle(opstr, INF_LONG_DN);
if (tempstr != "")
{
Message("%8.8Xh: %s\n", addr, tempstr);
}
else
{
Message("%8.8Xh: %s\n", addr, opstr);
}
/*
* The operand that was found to have the PIC register will now be replaced
* with more descriptive text. The format of this text depends upon the value
* of opformat.
*/
OpAlt(addr, whichop, DoOperandFormat(opformat, opstr));
count++;
}
addr++;
}
Message("Scan for PIC code is complete! Found %d data items referenced via PIC register.\n", count);
Message("Please re-open the database so that newly found strings will appear in the Strings window\n");
SetStatus(IDA_STATUS_READY);
}
/*
* Tries to determine the current PIC register given the current address being processed
* and the previous PIC register.
*/
static GetPICRegister(addr, previous)
{
auto assemblyStr, idx, reg;
assemblyStr = GetDisasm(addr);
if ((idx = strstr(assemblyStr, "call __i686_get_pc_thunk_")) != -1)
{
/* 28 is the length of the above string */
reg = substr(assemblyStr, idx + 28, 30);
if (reg == "ax")
{
return REG_EAX;
}
else if (reg == "bx")
{
return REG_EBX;
}
else if (reg == "cx")
{
return REG_ECX;
}
else if (reg == "dx")
{
return REG_EDX;
}
}
return previous;
}
/*
* Returns a string that is used as a substring search containing the specified
* PIC register and operator (+ or -).
*/
static GetPICSearchString(reg, operator)
{
if (reg == REG_EAX)
{
if (operator == OP_ADD)
{
return "[eax+";
}
else if (operator == OP_SUB)
{
return "[eax-";
}
}
else if (reg == REG_EBX)
{
if (operator == OP_ADD)
{
return "[ebx+";
}
else if (operator == OP_SUB)
{
return "[ebx-";
}
}
else if (reg == REG_ECX)
{
if (operator == OP_ADD)
{
return "[ecx+";
}
else if (operator == OP_SUB)
{
return "[ecx-";
}
}
else if (reg == REG_EDX)
{
if (operator == OP_ADD)
{
return "[edx+";
}
else if (operator == OP_SUB)
{
return "[edx-";
}
}
}
/*
* Returns a formatted string depending upon the value of the format param.
* This will be the replacement for the operand containing the PIC register.
*
* OPFORMAT_STRING: The referenced data address is a string in the .rodata section.
* OPFORMAT_DEREF: The referenced data address has a name. The PIC register operand is
* deferenced so the dereference brackets are shown in the returned string.
* OPFORMAT_NORMAL: The referenced data address does not have a name. But upon reading
* the address at the referenced data address, it is discovered that that
* address does have a name. There are no dereference brackets because the
* referenced data address had to be read in order to discover a name.
*/
static DoOperandFormat(format, str)
{
if (format == OPFORMAT_STRING)
{
return form("offset %s", str);
}
else if (format == OPFORMAT_DEREF)
{
return form("[ds:%s]", str);
}
else if (format == OPFORMAT_NORMAL)
{
return form("ds:%s", str);
}
else
{
return str;
}
}
// -----------------------------------------------------------------------------
// - IDA Pro Script -
// Name: gcc_fpic.idc
// By: Damaged Soul
// Desc: Add references for strings, variables, and other data that seem mangled
// due to GCC's -fPIC option and the .got section of an x86 ELF binary.
//
// Version History
// 1.0 [2007-11-22]
// - Initial Version
// 1.1 [2008-05-02]
// - Now works with GCC 4.x compiled binaries
// 1.2 [2008-11-06]
// - Now works with GCC 4.3 compiled binaries
// - Fixed: Redefining alignment blocks as data caused IDA to pop up
// an annoying warning
// -----------------------------------------------------------------------------
#include <idc.idc>
#define REG_NONE 0
#define REG_EAX 1
#define REG_EBX 2
#define REG_ECX 3
#define REG_EDX 4
#define OP_ADD 1
#define OP_SUB 2
#define OPFORMAT_STRING 1
#define OPFORMAT_DEREF 2
#define OPFORMAT_NORMAL 3
static main()
{
auto filetype, compiler, demang, strPrefix;
auto seg, codeStart, codeEnd, roStart, roEnd, gotStart, gotEnd, opformat;
auto addr, funcend, whichop, reg, tempstr;
auto operand1, operand2, opval, opstr, dataAddr, flags, count;
SetStatus(IDA_STATUS_WORK);
Message("Starting scan for -fPIC code...\n");
/* Check file type and compiler */
filetype = GetShortPrm(INF_FILETYPE);
if (filetype != FT_ELF)
{
Message("Scan aborted. Input file must be using ELF binary format!\n");
SetStatus(IDA_STATUS_READY);
return;
}
compiler = GetCharPrm(INF_COMPILER);
if (compiler != COMP_GNU)
{
Message("Scan aborted. Input file must have been compiled with GNU GCC/G++!\n");
SetStatus(IDA_STATUS_READY);
return;
}
/*
* If the GCC v3.x names option is not set, then set it first.
*
* :TODO: Need to change this if GCC 2.95 support is to be added.
*/
demang = GetCharPrm(INF_DEMNAMES);
if ((demang & DEMNAM_GCC3) == 0)
{
SetCharPrm(INF_DEMNAMES, demang | DEMNAM_GCC3);
}
/* Get string prefix */
strPrefix = GetCharPrm(INF_ASCIIPREF);
/* Get address of first section in binary */
seg = FirstSeg();
/* Iterate through all sections and get address of .text, .rodata, and .got */
while (seg != BADADDR)
{
if (SegName(seg) == ".text")
{
codeStart = seg;
codeEnd = NextSeg(seg);
Message("%08.8Xh - %08.8Xh: .text\n", codeStart, codeEnd);
}
else if (SegName(seg) == ".rodata")
{
roStart = seg;
roEnd = NextSeg(seg);
Message("%08.8Xh - %08.8Xh: .rodata\n", roStart, roEnd);
}
else if (SegName(seg) == "abs")
{
addr = FindText(seg, SEARCH_DOWN|SEARCH_CASE|SEARCH_NOSHOW, 0, 0, "_GLOBAL_OFFSET_TABLE_");
gotStart = Dword(addr);
gotEnd = NextSeg(gotStart);
Message("%08.8Xh - %08.8Xh: .got\n", gotStart, gotEnd);
}
seg = NextSeg(seg);
}
addr = codeStart;
funcend = -1;
count = 0;
/**
* Go through .text section while looking for anything like [e?x+blah] or [e?x-blah].
*
* The eax, ebx, ecx, or edx registers are used for storing the address of the .got
* section with -fPIC code. An offset, either negative or positive is added to e?x.
* This results in the address of a string, variable, or other type of data.
*
* In order to determine which register .got will be stored in, one can look at which
* __i686.get_pc_thunk.? function is called near the beginning of each function. The
* suffix on this function is either ax, bx, cx, or dx and corresponds to eax, ebx,
* ecx, or edx.
*/
while (addr <= codeEnd)
{
operand1 = GetOpnd(addr, 0);
operand2 = GetOpnd(addr, 1);
whichop = -1;
/* Get function end */
if (FindFuncEnd(addr) != funcend)
{
reg = REG_NONE;
funcend = FindFuncEnd(addr);
}
/* Get current PIC register */
reg = GetPICRegister(addr, reg, funcend);
if (reg != REG_NONE)
{
/* Search first operand for substring containing PIC register and either a plus or minus sign */
if (strstr(operand1, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand1, GetPICSearchString(reg, OP_SUB)) != -1)
{
whichop = 0;
}
/* Search second operand for substring containing PIC register and either a plus or minus sign */
if (strstr(operand2, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand2, GetPICSearchString(reg, OP_SUB)) != -1)
{
whichop = 1;
}
}
if (whichop != -1)
{
/* Get .got offset */
opval = GetOperandValue(addr, whichop);
/* Get address inside .got */
dataAddr = gotStart + opval;
/* Get name at address if it exists */
opstr = Name(dataAddr);
/* If name doesn't exist then... */
if (opstr == "")
{
/*
* Check address to see if it falls in .rodata section.
* If it does, then try to make it a string which will automatically give it a name.
*/
if (dataAddr >= roStart && dataAddr <= roEnd && MakeStr(dataAddr, BADADDR))
{
/* Get automatically created name */
opstr = Name(dataAddr);
opformat = OPFORMAT_STRING;
/*
* Sometimes IDA creates a string successfully but not exactly in the right place.
* Uncertain as to why this is (perhaps an IDA bug?), but usually the string in
* question is a bunch of garbage.
*/
if (opstr == "")
{
/* Create a name based on the address */
opstr = form("unk_%X", dataAddr);
if (strstr(GetDisasm(dataAddr), "align") != -1)
{
MakeUnkn(dataAddr, DOUNK_SIMPLE);
}
MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN);
opformat = OPFORMAT_DEREF;
}
}
else
{
/*
* If address didn't fall into .rodata and the string creation was unsuccessful,
* then try to read the address at 'addr' and get the name of that.
*/
opstr = Name(Dword(dataAddr));
if (opstr == "")
{
/* If name doesn't exist for that, then create name based on address */
opstr = form("unk_%X", dataAddr);
if (strstr(GetDisasm(dataAddr), "align") != -1)
{
MakeUnkn(dataAddr, DOUNK_SIMPLE);
}
MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN);
opformat = OPFORMAT_DEREF;
}
else
{
/* If the name did exist at this point, then use it */
opformat = OPFORMAT_NORMAL;
}
}
}
else
{
/* If the name at the original address does exist then ... */
flags = GetFlags(dataAddr);
/*
* If this address falls into .rodata section and is considered an existing string
* then the replacement operand needs to shown as a string.
*/
if (dataAddr >= roStart && dataAddr <= roEnd && strPrefix != "" && strstr(opstr, strPrefix) == 0)
{
opformat = OPFORMAT_STRING;
}
else
{
opformat = OPFORMAT_DEREF;
}
}
/*
* Try to demangle the name that was found or created above and print it as sort of
* a status message to show the the script is still doing work as this usually
* can take awhile.
*/
tempstr = Demangle(opstr, INF_LONG_DN);
if (tempstr != "")
{
Message("%8.8Xh: %s\n", addr, tempstr);
}
else
{
Message("%8.8Xh: %s\n", addr, opstr);
}
/*
* The operand that was found to have the PIC register will now be replaced
* with more descriptive text. The format of this text depends upon the value
* of opformat.
*/
OpAlt(addr, whichop, DoOperandFormat(opformat, opstr));
count++;
}
addr++;
}
Message("Scan for PIC code is complete! Found %d data items referenced via PIC register.\n", count);
Message("Please re-open the database so that newly found strings will appear in the Strings window\n");
SetStatus(IDA_STATUS_READY);
}
/*
* Tries to determine the current PIC register given the current address being processed
* and the previous PIC register.
*/
static GetPICRegister(addr, previous, funcend)
{
auto assemblyStr, idx, reg, ab;
assemblyStr = GetDisasm(addr);
if ((idx = strstr(assemblyStr, "call __i686_get_pc_thunk_")) != -1)
{
/* 28 is the length of the above string */
reg = substr(assemblyStr, idx + 28, 30);
}
else if (strstr(assemblyStr, "call $+5") != -1)
{
assemblyStr = GetDisasm(NextHead(addr, funcend));
reg = substr(assemblyStr, 9, 11);
}
if (reg == "ax")
{
return REG_EAX;
}
else if (reg == "bx")
{
return REG_EBX;
}
else if (reg == "cx")
{
return REG_ECX;
}
else if (reg == "dx")
{
return REG_EDX;
}
return previous;
}
/*
* Returns a string that is used as a substring search containing the specified
* PIC register and operator (+ or -).
*/
static GetPICSearchString(reg, operator)
{
if (reg == REG_EAX)
{
if (operator == OP_ADD)
{
return "[eax+";
}
else if (operator == OP_SUB)
{
return "[eax-";
}
}
else if (reg == REG_EBX)
{
if (operator == OP_ADD)
{
return "[ebx+";
}
else if (operator == OP_SUB)
{
return "[ebx-";
}
}
else if (reg == REG_ECX)
{
if (operator == OP_ADD)
{
return "[ecx+";
}
else if (operator == OP_SUB)
{
return "[ecx-";
}
}
else if (reg == REG_EDX)
{
if (operator == OP_ADD)
{
return "[edx+";
}
else if (operator == OP_SUB)
{
return "[edx-";
}
}
}
/*
* Returns a formatted string depending upon the value of the format param.
* This will be the replacement for the operand containing the PIC register.
*
* OPFORMAT_STRING: The referenced data address is a string in the .rodata section.
* OPFORMAT_DEREF: The referenced data address has a name. The PIC register operand is
* deferenced so the dereference brackets are shown in the returned string.
* OPFORMAT_NORMAL: The referenced data address does not have a name. But upon reading
* the address at the referenced data address, it is discovered that that
* address does have a name. There are no dereference brackets because the
* referenced data address had to be read in order to discover a name.
*/
static DoOperandFormat(format, str)
{
if (format == OPFORMAT_STRING)
{
return form("offset %s", str);
}
else if (format == OPFORMAT_DEREF)
{
return form("[ds:%s]", str);
}
else if (format == OPFORMAT_NORMAL)
{
return form("ds:%s", str);
}
else
{
return str;
}
}