// -----------------------------------------------------------------------------
// - IDA Pro Script -
// Name: gcc_fpic.idc
// By: Damaged Soul
// Desc: Add references for strings, variables, and other data that seem mangled
//       due to GCC's -fPIC option and the .got section of an x86 ELF binary.
// 
// Version History
// 1.0 [2007-11-22]
//     - Initial Version
// 1.1 [2008-05-02]
//     - Now works with GCC 4.x compiled binaries
// 1.2 [2008-11-06]
//     - Now works with GCC 4.3 compiled binaries
//     - Fixed: Redefining alignment blocks as data caused IDA to pop up
//       an annoying warning
// -----------------------------------------------------------------------------

#include <idc.idc>

#define REG_NONE 0
#define REG_EAX 1
#define REG_EBX 2
#define REG_ECX 3
#define REG_EDX 4

#define OP_ADD 1
#define OP_SUB 2

#define OPFORMAT_STRING 1
#define OPFORMAT_DEREF  2
#define OPFORMAT_NORMAL 3

static main()
{
	auto filetype, compiler, demang, strPrefix;
	auto seg, codeStart, codeEnd, roStart, roEnd, gotStart, gotEnd, opformat;
	auto addr, funcend, whichop, reg, tempstr;
	auto operand1, operand2, opval, opstr, dataAddr, flags, count;
	
	SetStatus(IDA_STATUS_WORK);
	Message("Starting scan for -fPIC code...\n");
	
	/* Check file type and compiler */
	filetype = GetShortPrm(INF_FILETYPE);
	if (filetype != FT_ELF)
	{
		Message("Scan aborted. Input file must be using ELF binary format!\n");
		SetStatus(IDA_STATUS_READY);
		return;
	}
	compiler = GetCharPrm(INF_COMPILER);
	if (compiler != COMP_GNU)
	{
		Message("Scan aborted. Input file must have been compiled with GNU GCC/G++!\n");
		SetStatus(IDA_STATUS_READY);
		return;
	}

	/* 
	 * If the GCC v3.x names option is not set, then set it first.
	 *
	 * :TODO: Need to change this if GCC 2.95 support is to be added.
	 */
	demang = GetCharPrm(INF_DEMNAMES);
	if ((demang & DEMNAM_GCC3) == 0)
	{
		SetCharPrm(INF_DEMNAMES, demang | DEMNAM_GCC3);
	}
	
	/* Get string prefix */
	strPrefix = GetCharPrm(INF_ASCIIPREF);

	/* Get address of first section in binary */
	seg = FirstSeg();
	
	/* Iterate through all sections and get address of .text, .rodata, and .got */
	while (seg != BADADDR)
	{
		if (SegName(seg) == ".text")
		{
			codeStart = seg;
			codeEnd = NextSeg(seg);
			Message("%08.8Xh - %08.8Xh: .text\n", codeStart, codeEnd);
		}
		else if (SegName(seg) == ".rodata")
		{
			roStart = seg;
			roEnd = NextSeg(seg);
			Message("%08.8Xh - %08.8Xh: .rodata\n", roStart, roEnd);
		}
		else if (SegName(seg) == "abs")
		{
			addr = FindText(seg, SEARCH_DOWN|SEARCH_CASE|SEARCH_NOSHOW, 0, 0, "_GLOBAL_OFFSET_TABLE_");
			gotStart = Dword(addr);
			gotEnd = NextSeg(gotStart);
			Message("%08.8Xh - %08.8Xh: .got\n", gotStart, gotEnd);
		}
		
		seg = NextSeg(seg);
	}
	
	addr = codeStart;
	funcend = -1;
	count = 0;
	
	/**
	 * Go through .text section while looking for anything like [e?x+blah] or [e?x-blah].
	 *
	 * The eax, ebx, ecx, or edx registers are used for storing the address of the .got
	 * section with -fPIC code. An offset, either negative or positive is added to e?x.
	 * This results in the address of a string, variable, or other type of data.
	 *
	 * In order to determine which register .got will be stored in, one can look at which
	 * __i686.get_pc_thunk.? function is called near the beginning of each function. The
	 * suffix on this function is either ax, bx, cx, or dx and corresponds to eax, ebx, 
	 * ecx, or edx.
	 */
	while (addr <= codeEnd)
	{		
		operand1 = GetOpnd(addr, 0);
		operand2 = GetOpnd(addr, 1);
		whichop = -1;
		
		/* Get function end */
		if (FindFuncEnd(addr) != funcend)
		{
			reg = REG_NONE;
			funcend = FindFuncEnd(addr);
		}
		
		/* Get current PIC register */
		reg = GetPICRegister(addr, reg, funcend);
		
		if (reg != REG_NONE)
		{
			/* Search first operand for substring containing PIC register and either a plus or minus sign */
			if (strstr(operand1, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand1, GetPICSearchString(reg, OP_SUB)) != -1)
			{
				whichop = 0;
			}
		
			/* Search second operand for substring containing PIC register and either a plus or minus sign */
			if (strstr(operand2, GetPICSearchString(reg, OP_ADD)) != -1 || strstr(operand2, GetPICSearchString(reg, OP_SUB)) != -1)
			{
				whichop = 1;
			}
		}
		
		if (whichop != -1)
		{
			/* Get .got offset */
			opval = GetOperandValue(addr, whichop);
			
			/* Get address inside .got */
			dataAddr = gotStart + opval;
			
			/* Get name at address if it exists */
			opstr = Name(dataAddr);
			
			/* If name doesn't exist then... */
			if (opstr == "")
			{	
				/* 
				 * Check address to see if it falls in .rodata section.
				 * If it does, then try to make it a string which will automatically give it a name.
				 */
				if (dataAddr >= roStart && dataAddr <= roEnd && MakeStr(dataAddr, BADADDR))
				{
					/* Get automatically created name */
					opstr = Name(dataAddr);
					opformat = OPFORMAT_STRING;
					
					/*
					 * Sometimes IDA creates a string successfully but not exactly in the right place.
					 * Uncertain as to why this is (perhaps an IDA bug?), but usually the string in 
					 * question is a bunch of garbage.
					 */
					if (opstr == "")
					{
						/* Create a name based on the address */
						opstr = form("unk_%X", dataAddr);
						if (strstr(GetDisasm(dataAddr), "align") != -1)
						{
							MakeUnkn(dataAddr, DOUNK_SIMPLE);
						}
						MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN);
						opformat = OPFORMAT_DEREF;
					}
				}
				else
				{
					/* 
					 * If address didn't fall into .rodata and the string creation was unsuccessful,
					 * then try to read the address at 'addr' and get the name of that.
					 */
					opstr = Name(Dword(dataAddr));
					if (opstr == "")
					{
						/* If name doesn't exist for that, then create name based on address */
						opstr = form("unk_%X", dataAddr);
						if (strstr(GetDisasm(dataAddr), "align") != -1)
						{
							MakeUnkn(dataAddr, DOUNK_SIMPLE);
						}
						MakeNameEx(dataAddr, opstr, SN_NOCHECK|SN_NOLIST|SN_NOWARN);
						opformat = OPFORMAT_DEREF;
					}
					else
					{
						/* If the name did exist at this point, then use it */
						opformat = OPFORMAT_NORMAL;
					}
				}
			}
			else
			{
				/* If the name at the original address does exist then ... */
				
				flags = GetFlags(dataAddr);
				
				/* 
				 * If this address falls into .rodata section and is considered an existing string
				 * then the replacement operand needs to shown as a string.
				 */ 
				if (dataAddr >= roStart && dataAddr <= roEnd && strPrefix != "" && strstr(opstr, strPrefix) == 0)
				{
					opformat = OPFORMAT_STRING;
				}
				else
				{
					opformat = OPFORMAT_DEREF;
				}
			}

			/*
			 * Try to demangle the name that was found or created above and print it as sort of
			 * a status message to show the the script is still doing work as this usually
			 * can take awhile.
			 */
			tempstr = Demangle(opstr, INF_LONG_DN);
			if (tempstr != "")
			{
				Message("%8.8Xh: %s\n", addr, tempstr);
			}
			else
			{
				Message("%8.8Xh: %s\n", addr, opstr);
			}
			
			/*
			 * The operand that was found to have the PIC register will now be replaced
			 * with more descriptive text. The format of this text depends upon the value
			 * of opformat.
			 */
			OpAlt(addr, whichop, DoOperandFormat(opformat, opstr));
			
			count++;
		}
	
		addr++;
	}
	
	Message("Scan for PIC code is complete! Found %d data items referenced via PIC register.\n", count);
	Message("Please re-open the database so that newly found strings will appear in the Strings window\n");
	SetStatus(IDA_STATUS_READY);
}

/*
 * Tries to determine the current PIC register given the current address being processed
 * and the previous PIC register.
 */
static GetPICRegister(addr, previous, funcend)
{
	auto assemblyStr, idx, reg, ab;
	assemblyStr = GetDisasm(addr);
	
	if ((idx = strstr(assemblyStr, "call    __i686_get_pc_thunk_")) != -1)
	{
		/* 28 is the length of the above string */ 
		reg = substr(assemblyStr, idx + 28, 30);
	}
	else if (strstr(assemblyStr, "call    $+5") != -1)
	{
		assemblyStr = GetDisasm(NextHead(addr, funcend));
		reg = substr(assemblyStr, 9, 11);
	}
	
	if (reg == "ax")
	{
		return REG_EAX;
	}
	else if (reg == "bx")
	{
		return REG_EBX;
	}
	else if (reg == "cx")
	{
		return REG_ECX;
	}
	else if (reg == "dx")
	{
		return REG_EDX;
	}
	
	return previous;
}

/*
 * Returns a string that is used as a substring search containing the specified
 * PIC register and operator (+ or -).
 */
static GetPICSearchString(reg, operator)
{
	if (reg == REG_EAX)
	{
		if (operator == OP_ADD)
		{
			return "[eax+";
		}
		else if (operator == OP_SUB)
		{
			return "[eax-";
		}
	}
	else if (reg == REG_EBX)
	{
		if (operator == OP_ADD)
		{
			return "[ebx+";
		}
		else if (operator == OP_SUB)
		{
			return "[ebx-";
		}
	}
	else if (reg == REG_ECX)
	{
		if (operator == OP_ADD)
		{
			return "[ecx+";
		}
		else if (operator == OP_SUB)
		{
			return "[ecx-";
		}
	}
	else if (reg == REG_EDX)
	{
		if (operator == OP_ADD)
		{
			return "[edx+";
		}
		else if (operator == OP_SUB)
		{
			return "[edx-";
		}
	}
}

/*
 * Returns a formatted string depending upon the value of the format param.
 * This will be the replacement for the operand containing the PIC register.
 *
 * OPFORMAT_STRING: The referenced data address is a string in the .rodata section.
 * OPFORMAT_DEREF:  The referenced data address has a name. The PIC register operand is
 *                  deferenced so the dereference brackets are shown in the returned string.
 * OPFORMAT_NORMAL: The referenced data address does not have a name. But upon reading
 *                  the address at the referenced data address, it is discovered that that
 *                  address does have a name. There are no dereference brackets because the
 *                  referenced data address had to be read in order to discover a name.
 */
static DoOperandFormat(format, str)
{
	if (format == OPFORMAT_STRING)
	{
		return form("offset %s", str);
	}
	else if (format == OPFORMAT_DEREF)
	{
		return form("[ds:%s]", str);
	}
	else if (format == OPFORMAT_NORMAL)
	{
		return form("ds:%s", str);
	}
	else
	{
		return str;
	}
}