1099 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1099 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/**
 | 
						|
 * vim: set ts=4 sw=4 tw=99 noet :
 | 
						|
 * =============================================================================
 | 
						|
 * SourceMod
 | 
						|
 * Copyright (C) 2004-2008 AlliedModders LLC.  All rights reserved.
 | 
						|
 * =============================================================================
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or modify it under
 | 
						|
 * the terms of the GNU General Public License, version 3.0, as published by the
 | 
						|
 * Free Software Foundation.
 | 
						|
 * 
 | 
						|
 * This program is distributed in the hope that it will be useful, but WITHOUT
 | 
						|
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | 
						|
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 | 
						|
 * details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU General Public License along with
 | 
						|
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
 *
 | 
						|
 * As a special exception, AlliedModders LLC gives you permission to link the
 | 
						|
 * code of this program (as well as its derivative works) to "Half-Life 2," the
 | 
						|
 * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
 | 
						|
 * by the Valve Corporation.  You must obey the GNU General Public License in
 | 
						|
 * all respects for all other code used.  Additionally, AlliedModders LLC grants
 | 
						|
 * this exception to all derivative works.  AlliedModders LLC defines further
 | 
						|
 * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
 | 
						|
 * or <http://www.sourcemod.net/license.php>.
 | 
						|
 *
 | 
						|
 * Version: $Id$
 | 
						|
 */
 | 
						|
 | 
						|
#include <stdio.h>
 | 
						|
#include <ctype.h>
 | 
						|
#include <wctype.h>
 | 
						|
#include <string.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <assert.h>
 | 
						|
#include "TextParsers.h"
 | 
						|
#include <ILibrarySys.h>
 | 
						|
#include <am-string.h>
 | 
						|
 | 
						|
TextParsers g_TextParser;
 | 
						|
ITextParsers *textparsers = &g_TextParser;
 | 
						|
 | 
						|
static int g_ini_chartable1[255] = {0};
 | 
						|
static int g_ws_chartable[255] = {0};
 | 
						|
 | 
						|
bool TextParsers::IsWhitespace(const char *stream)
 | 
						|
{
 | 
						|
	return g_ws_chartable[(unsigned char)*stream] == 1;
 | 
						|
}
 | 
						|
 | 
						|
TextParsers::TextParsers()
 | 
						|
{
 | 
						|
	g_ini_chartable1[(unsigned)'_'] = 1;
 | 
						|
	g_ini_chartable1[(unsigned)'-'] = 1;
 | 
						|
	g_ini_chartable1[(unsigned)','] = 1;
 | 
						|
	g_ini_chartable1[(unsigned)'+'] = 1;
 | 
						|
	g_ini_chartable1[(unsigned)'.'] = 1;
 | 
						|
	g_ini_chartable1[(unsigned)'$'] = 1;
 | 
						|
	g_ini_chartable1[(unsigned)'?'] = 1;
 | 
						|
	g_ini_chartable1[(unsigned)'/'] = 1;
 | 
						|
	g_ws_chartable[(unsigned)'\n'] = 1;
 | 
						|
	g_ws_chartable[(unsigned)'\v'] = 1;
 | 
						|
	g_ws_chartable[(unsigned)'\r'] = 1;
 | 
						|
	g_ws_chartable[(unsigned)'\t'] = 1;
 | 
						|
	g_ws_chartable[(unsigned)'\f'] = 1;
 | 
						|
	g_ws_chartable[(unsigned)' '] = 1;
 | 
						|
}
 | 
						|
 | 
						|
void TextParsers::OnSourceModAllInitialized()
 | 
						|
{
 | 
						|
	sharesys->AddInterface(NULL, this);
 | 
						|
}
 | 
						|
 | 
						|
unsigned int TextParsers::GetUTF8CharBytes(const char *stream)
 | 
						|
{
 | 
						|
	return _GetUTF8CharBytes(stream);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * File streams
 | 
						|
 */
 | 
						|
 | 
						|
bool FileStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read)
 | 
						|
{
 | 
						|
	size_t num = fread(buffer, 1, maxlength, (FILE *)stream);
 | 
						|
 | 
						|
	*read = static_cast<unsigned int>(num);
 | 
						|
 | 
						|
	if (num == 0 && feof((FILE *)stream))
 | 
						|
	{
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
 | 
						|
	return (ferror((FILE *)stream) == 0);
 | 
						|
}
 | 
						|
 | 
						|
SMCError TextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc, SMCStates *states)
 | 
						|
{
 | 
						|
	FILE *fp = fopen(file, "rt");
 | 
						|
 | 
						|
	if (!fp)
 | 
						|
	{
 | 
						|
		if (states != NULL)
 | 
						|
		{
 | 
						|
			states->line = 0;
 | 
						|
			states->col = 0;
 | 
						|
		}
 | 
						|
		return SMCError_StreamOpen;
 | 
						|
	}
 | 
						|
 | 
						|
	SMCError result = ParseStream_SMC(fp, FileStreamReader, smc, states);
 | 
						|
 | 
						|
	fclose(fp);
 | 
						|
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
SMCError TextParsers::ParseSMCFile(const char *file,
 | 
						|
								   ITextListener_SMC *smc_listener,
 | 
						|
								   SMCStates *states,
 | 
						|
								   char *buffer,
 | 
						|
								   size_t maxsize)
 | 
						|
{
 | 
						|
	const char *errstr;
 | 
						|
	FILE *fp = fopen(file, "rt");
 | 
						|
 | 
						|
	if (fp == NULL)
 | 
						|
	{
 | 
						|
		char error[256] = "unknown";
 | 
						|
		if (states != NULL)
 | 
						|
		{
 | 
						|
			states->line = 0;
 | 
						|
			states->col = 0;
 | 
						|
		}
 | 
						|
		libsys->GetPlatformError(error, sizeof(error));
 | 
						|
		ke::SafeSprintf(buffer, maxsize, "File could not be opened: %s", error);
 | 
						|
		return SMCError_StreamOpen;
 | 
						|
	}
 | 
						|
 | 
						|
	SMCError result = ParseStream_SMC(fp, FileStreamReader, smc_listener, states);
 | 
						|
 | 
						|
	fclose(fp);
 | 
						|
 | 
						|
	errstr = GetSMCErrorString(result);
 | 
						|
	ke::SafeSprintf(buffer, maxsize, "%s", errstr != NULL ? errstr : "Unknown error");
 | 
						|
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
struct RawStream
 | 
						|
{
 | 
						|
	const char *stream;
 | 
						|
	size_t length;
 | 
						|
	size_t pos;
 | 
						|
};
 | 
						|
 | 
						|
bool RawStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read)
 | 
						|
{
 | 
						|
	RawStream *rs = (RawStream *)stream;
 | 
						|
 | 
						|
	if (rs->pos >= rs->length)
 | 
						|
	{
 | 
						|
		return false;
 | 
						|
	}
 | 
						|
 | 
						|
	size_t remaining = rs->length - rs->pos;
 | 
						|
 | 
						|
	/* Use the smaller of the two */
 | 
						|
	size_t copy = (remaining > maxlength) ? maxlength : remaining;
 | 
						|
 | 
						|
	memcpy(buffer, &rs->stream[rs->pos], copy);
 | 
						|
	rs->pos += copy;
 | 
						|
	*read = copy;
 | 
						|
	assert(rs->pos <= rs->length);
 | 
						|
 | 
						|
	return true;
 | 
						|
}
 | 
						|
 | 
						|
SMCError TextParsers::ParseSMCStream(const char *stream,
 | 
						|
									 size_t length,
 | 
						|
									 ITextListener_SMC *smc_listener,
 | 
						|
									 SMCStates *states,
 | 
						|
									 char *buffer,
 | 
						|
									 size_t maxsize)
 | 
						|
{
 | 
						|
	RawStream rs;
 | 
						|
	SMCError result;
 | 
						|
 | 
						|
	rs.stream = stream;
 | 
						|
	rs.length = length;
 | 
						|
	rs.pos = 0;
 | 
						|
 | 
						|
	result = ParseStream_SMC(&rs, RawStreamReader, smc_listener, states);
 | 
						|
 | 
						|
	const char *errstr = GetSMCErrorString(result);
 | 
						|
	ke::SafeSprintf(buffer, maxsize, "%s", errstr != NULL ? errstr : "Unknown error");
 | 
						|
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
/** 
 | 
						|
 * Raw parsing of streams with helper functions
 | 
						|
 */
 | 
						|
 | 
						|
struct StringInfo
 | 
						|
{
 | 
						|
	StringInfo() : quoted(false), ptr(NULL), end(NULL), special(false) { }
 | 
						|
	bool quoted;
 | 
						|
	char *ptr;
 | 
						|
	char *end;
 | 
						|
	bool special;
 | 
						|
};
 | 
						|
 | 
						|
const char *FixupString(StringInfo &data)
 | 
						|
{
 | 
						|
	if (!data.ptr)
 | 
						|
	{
 | 
						|
		return NULL;
 | 
						|
	}
 | 
						|
 | 
						|
	if (data.quoted)
 | 
						|
	{
 | 
						|
		data.ptr++;
 | 
						|
	}
 | 
						|
#if defined _DEBUG
 | 
						|
	else {
 | 
						|
		/* A string will never have beginning whitespace because we ignore it in the stream.
 | 
						|
		 * Furthermore, if there is trailing whitespace, the end ptr will point to it, so it is valid
 | 
						|
		 * to overwrite!  Lastly, the last character must be whitespace or a comment/invalid character.
 | 
						|
		 */
 | 
						|
 	}
 | 
						|
#endif
 | 
						|
 | 
						|
	/* Do some extra work on strings that have special quoted characters. */
 | 
						|
	if (data.special)
 | 
						|
	{
 | 
						|
		char *outptr = data.ptr;
 | 
						|
		size_t len = data.end - data.ptr;
 | 
						|
		if (len >= 2)
 | 
						|
		{
 | 
						|
			for (size_t i=0; i<len; i++)
 | 
						|
			{
 | 
						|
				if (data.ptr[i] == '\\' && i < len - 1)
 | 
						|
				{
 | 
						|
					/* Resolve the next character. */
 | 
						|
					i++;
 | 
						|
					if (data.ptr[i] == 'n')
 | 
						|
					{
 | 
						|
						data.ptr[i] = '\n';
 | 
						|
					} else if (data.ptr[i] == 't') {
 | 
						|
						data.ptr[i] = '\t';
 | 
						|
					} else if (data.ptr[i] == 'r') {
 | 
						|
						data.ptr[i] = '\r';
 | 
						|
					} else if (data.ptr[i] != '\\'
 | 
						|
						&& data.ptr[i] != '"') {
 | 
						|
						/* This character is invalid, so go back one */
 | 
						|
						i--;
 | 
						|
					}
 | 
						|
				}
 | 
						|
				*outptr++ = data.ptr[i];
 | 
						|
			}
 | 
						|
			*outptr = '\0';
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if (data.end)
 | 
						|
	{
 | 
						|
		*(data.end) = '\0';
 | 
						|
	}
 | 
						|
 | 
						|
	return data.ptr;
 | 
						|
}
 | 
						|
 | 
						|
const char *rotate(StringInfo info[3])
 | 
						|
{
 | 
						|
	if (info[2].ptr != NULL)
 | 
						|
	{
 | 
						|
		return info[2].ptr;
 | 
						|
	}
 | 
						|
 | 
						|
	if (info[0].ptr != NULL)
 | 
						|
	{
 | 
						|
		info[2] = info[1];
 | 
						|
		info[1] = info[0];
 | 
						|
		info[0] = StringInfo();
 | 
						|
	}
 | 
						|
	
 | 
						|
	return NULL;
 | 
						|
}
 | 
						|
 | 
						|
void scrap(StringInfo info[3])
 | 
						|
{
 | 
						|
	info[2] = StringInfo();
 | 
						|
	info[1] = StringInfo();
 | 
						|
	info[0] = StringInfo();
 | 
						|
}
 | 
						|
 | 
						|
void reloc(StringInfo &data, unsigned int bytes)
 | 
						|
{
 | 
						|
	if (data.ptr)
 | 
						|
	{
 | 
						|
		data.ptr -= bytes;
 | 
						|
	}
 | 
						|
	if (data.end)
 | 
						|
	{
 | 
						|
		data.end -= bytes;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
char *lowstring(StringInfo info[3])
 | 
						|
{
 | 
						|
	for (int i=2; i>=0; i--)
 | 
						|
	{
 | 
						|
		if (info[i].ptr)
 | 
						|
		{
 | 
						|
			return info[i].ptr;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return NULL;
 | 
						|
}
 | 
						|
 | 
						|
SMCError TextParsers::ParseStream_SMC(void *stream, 
 | 
						|
								   STREAMREADER srdr, 
 | 
						|
								   ITextListener_SMC *smc, 
 | 
						|
								   SMCStates *pStates)
 | 
						|
{
 | 
						|
	char *reparse_point = NULL;
 | 
						|
	char in_buf[4096];
 | 
						|
	char *parse_point = in_buf;
 | 
						|
	char *line_begin = in_buf;
 | 
						|
	unsigned int read;
 | 
						|
	unsigned int curlevel = 0;
 | 
						|
	bool in_quote = false;
 | 
						|
	bool ignoring = false;
 | 
						|
	bool eol_comment = false;
 | 
						|
	bool ml_comment = false;
 | 
						|
	unsigned int i;
 | 
						|
	SMCError err = SMCError_Okay;
 | 
						|
	SMCResult res;
 | 
						|
	SMCStates states;
 | 
						|
	char c;
 | 
						|
 | 
						|
	StringInfo strings[3];
 | 
						|
	StringInfo emptystring;
 | 
						|
 | 
						|
	states.line = 1;
 | 
						|
	states.col = 0;
 | 
						|
 | 
						|
	smc->ReadSMC_ParseStart();
 | 
						|
 | 
						|
	/**
 | 
						|
	 * The stream reader reads in as much as it can fill the buffer with.
 | 
						|
	 * It then processes the buffer.  If the buffer cannot be fully processed, for example, 
 | 
						|
	 * a line is left hanging with no newline, then the contents of the buffer is shifted 
 | 
						|
	 * down, and the buffer is filled from the stream reader again.
 | 
						|
	 *
 | 
						|
	 * What makes this particularly annoying is that we cache pointers everywhere, so when 
 | 
						|
	 * the shifting process takes place, all those pointers must be shifted as well.
 | 
						|
	 */
 | 
						|
	while (srdr(stream, parse_point, sizeof(in_buf) - (parse_point - in_buf) - 1, &read))
 | 
						|
	{
 | 
						|
		if (!read)
 | 
						|
		{
 | 
						|
			break;
 | 
						|
		}
 | 
						|
 | 
						|
		/* Check for BOM markings, which is only relevant on the first line.
 | 
						|
		 * Not worth it, but it could be moved out of the loop.
 | 
						|
		 */
 | 
						|
		if (states.line == 1 && 
 | 
						|
			in_buf[0] == (char)0xEF && 
 | 
						|
			in_buf[1] == (char)0xBB && 
 | 
						|
			in_buf[2] == (char)0xBF)
 | 
						|
		{
 | 
						|
			/* Move EVERYTHING down :\ */
 | 
						|
			memmove(in_buf, &in_buf[3], read - 3);
 | 
						|
			read -= 3;
 | 
						|
		}
 | 
						|
 | 
						|
		if (reparse_point)
 | 
						|
		{
 | 
						|
			read += (parse_point - reparse_point);
 | 
						|
			parse_point = reparse_point;
 | 
						|
			reparse_point = NULL;
 | 
						|
		}
 | 
						|
 | 
						|
		for (i=0; i<read; i++)
 | 
						|
		{
 | 
						|
			c = parse_point[i];
 | 
						|
			if (c == '\n')
 | 
						|
			{
 | 
						|
				/* If we got a newline, there's a lot of things that could have happened in the interim.
 | 
						|
				 * First, let's make sure the staged strings are rotated.
 | 
						|
				 */
 | 
						|
				if (strings[0].ptr)
 | 
						|
				{
 | 
						|
					strings[0].end = &parse_point[i];
 | 
						|
					if (rotate(strings) != NULL)
 | 
						|
					{
 | 
						|
						err = SMCError_InvalidTokens;
 | 
						|
						goto failed;
 | 
						|
					}
 | 
						|
				}
 | 
						|
 | 
						|
				/* Next, let's clear some line-based values that may no longer have meaning */
 | 
						|
				eol_comment = false;
 | 
						|
				in_quote = false;
 | 
						|
				if (ignoring && !ml_comment)
 | 
						|
				{
 | 
						|
					ignoring = false;
 | 
						|
				}
 | 
						|
 | 
						|
				/* Pass the raw line onto the listener.  We terminate the line so the receiver 
 | 
						|
				 * doesn't get tons of useless info.  We restore the newline after.
 | 
						|
				 */
 | 
						|
				parse_point[i] = '\0';
 | 
						|
				if ((res=smc->ReadSMC_RawLine(&states, line_begin)) != SMCResult_Continue)
 | 
						|
				{
 | 
						|
					err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | 
						|
					goto failed;
 | 
						|
				}
 | 
						|
				parse_point[i] = '\n';
 | 
						|
 | 
						|
				/* Now we check the sanity of our staged strings! */
 | 
						|
				if (strings[2].ptr)
 | 
						|
				{
 | 
						|
					if (!curlevel)
 | 
						|
					{
 | 
						|
						err = SMCError_InvalidProperty1;
 | 
						|
						goto failed;
 | 
						|
					}
 | 
						|
					/* Assume the next string is a property and pass the info on. */
 | 
						|
					if ((res=smc->ReadSMC_KeyValue(
 | 
						|
						&states,
 | 
						|
						FixupString(strings[2]),
 | 
						|
						FixupString(strings[1]))) != SMCResult_Continue)
 | 
						|
					{
 | 
						|
						err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | 
						|
						goto failed;
 | 
						|
					}
 | 
						|
					scrap(strings);
 | 
						|
				}
 | 
						|
 | 
						|
				/* Change the states for the next line */
 | 
						|
				states.col = 0;
 | 
						|
				states.line++;
 | 
						|
				line_begin = &parse_point[i+1];		//Note: safe because this gets relocated later
 | 
						|
			} 
 | 
						|
			else if (ignoring) 
 | 
						|
			{
 | 
						|
				if (in_quote)
 | 
						|
				{
 | 
						|
					/* If i was 0, we could have reparsed, so make sure there's no buffer underrun */
 | 
						|
					if ((&parse_point[i] != in_buf) && c == '"' && parse_point[i-1] != '\\')
 | 
						|
					{
 | 
						|
						/* If we reached a quote in an ignore phase,
 | 
						|
						 * we're staging a string and we must rotate it out.
 | 
						|
						 */
 | 
						|
						in_quote = false;
 | 
						|
						ignoring = false;
 | 
						|
						/* Set our info */
 | 
						|
						strings[0].end = &parse_point[i];
 | 
						|
						strings[0].quoted = true;
 | 
						|
						if (rotate(strings) != NULL)
 | 
						|
						{
 | 
						|
							/* If we rotated too many strings, there was too much crap on one line */
 | 
						|
							err = SMCError_InvalidTokens;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
					} 
 | 
						|
					else if (c == '\\') 
 | 
						|
					{
 | 
						|
						strings[0].special = true;
 | 
						|
						if (i == (read - 1))
 | 
						|
						{
 | 
						|
							reparse_point = &parse_point[i];
 | 
						|
							break;
 | 
						|
						}
 | 
						|
					}
 | 
						|
				} 
 | 
						|
				else if (ml_comment) 
 | 
						|
				{
 | 
						|
					if (c == '*')
 | 
						|
					{
 | 
						|
						/* Check if we need to get more input first */
 | 
						|
						if (i == read - 1)
 | 
						|
						{
 | 
						|
							reparse_point = &parse_point[i];
 | 
						|
							break;
 | 
						|
						}
 | 
						|
						if (parse_point[i+1] == '/')
 | 
						|
						{
 | 
						|
							ml_comment = false;
 | 
						|
							ignoring = false;
 | 
						|
							/* We should not be staging anything right now. */
 | 
						|
							assert(strings[0].ptr == NULL);
 | 
						|
							/* Advance the input stream so we don't choke on this token */
 | 
						|
							i++;
 | 
						|
							states.col++;
 | 
						|
						}
 | 
						|
					}
 | 
						|
				}
 | 
						|
			} 
 | 
						|
			else 
 | 
						|
			{
 | 
						|
				/* Check if we're whitespace or not */
 | 
						|
				if (!g_ws_chartable[(unsigned char)c])
 | 
						|
				{
 | 
						|
					bool restage = false;
 | 
						|
					/* Check various special tokens:
 | 
						|
					 * ;
 | 
						|
					 * //
 | 
						|
					 * / *
 | 
						|
					 * {
 | 
						|
					 * }
 | 
						|
					 */
 | 
						|
					if (c == ';' || c == '/')
 | 
						|
					{
 | 
						|
						/* If it's a line-based comment (that is, ; or //)
 | 
						|
						 * we will need to scrap everything until the end of the line.
 | 
						|
						 */
 | 
						|
						if (c == '/')
 | 
						|
						{
 | 
						|
							if (i == read - 1)
 | 
						|
							{
 | 
						|
								/* If we reached the end of the look-ahead, we need to re-check our input.
 | 
						|
								 * Breaking out will force this to be the new reparse point!
 | 
						|
								 */
 | 
						|
								reparse_point = &parse_point[i];
 | 
						|
								break;
 | 
						|
							}
 | 
						|
							if (parse_point[i + 1] == '/')
 | 
						|
							{
 | 
						|
								/* standard comment */
 | 
						|
								ignoring = true;
 | 
						|
								eol_comment = true;
 | 
						|
								restage = true;
 | 
						|
							} 
 | 
						|
							else if (parse_point[i+1] == '*') 
 | 
						|
							{
 | 
						|
								/* inline comment - start ignoring */
 | 
						|
								ignoring = true;
 | 
						|
								ml_comment = true;
 | 
						|
								/* yes, we restage, meaning that:
 | 
						|
								 * STR/ *stuff* /ING  (space because ml comments don't nest in C++)
 | 
						|
								 * will not generate 'STRING', but rather 'STR' and 'ING'.
 | 
						|
								 * This should be a rare occurrence and is done here for convenience.
 | 
						|
								 */
 | 
						|
								restage = true;
 | 
						|
							}
 | 
						|
						} 
 | 
						|
						else 
 | 
						|
						{
 | 
						|
							ignoring = true;
 | 
						|
							eol_comment = true;
 | 
						|
							restage = true;
 | 
						|
						}
 | 
						|
					} 
 | 
						|
					else if (c == '{') 
 | 
						|
					{
 | 
						|
						/* If we are staging a string, we must rotate here */
 | 
						|
						if (strings[0].ptr)
 | 
						|
						{
 | 
						|
							/* We have unacceptable tokens on this line */
 | 
						|
							if (rotate(strings) != NULL)
 | 
						|
							{
 | 
						|
								err = SMCError_InvalidSection1;
 | 
						|
								goto failed;
 | 
						|
							}
 | 
						|
						}
 | 
						|
						/* Sections must always be alone */
 | 
						|
						if (strings[2].ptr != NULL)
 | 
						|
						{
 | 
						|
							err = SMCError_InvalidSection1;
 | 
						|
							goto failed;
 | 
						|
						} 
 | 
						|
						else if (strings[1].ptr == NULL)
 | 
						|
						{
 | 
						|
							err = SMCError_InvalidSection2;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
						if ((res=smc->ReadSMC_NewSection(&states, FixupString(strings[1])))
 | 
						|
							!= SMCResult_Continue)
 | 
						|
						{
 | 
						|
							err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
						strings[1] = emptystring;
 | 
						|
						curlevel++;
 | 
						|
					} 
 | 
						|
					else if (c == '}') 
 | 
						|
					{
 | 
						|
						/* Unlike our matching friend, this can be on the same line as something prior */
 | 
						|
						if (rotate(strings) != NULL)
 | 
						|
						{
 | 
						|
							err = SMCError_InvalidSection3;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
						if (strings[2].ptr)
 | 
						|
						{
 | 
						|
							if (!curlevel)
 | 
						|
							{
 | 
						|
								err = SMCError_InvalidProperty1;
 | 
						|
								goto failed;
 | 
						|
							}
 | 
						|
							if ((res=smc->ReadSMC_KeyValue(
 | 
						|
											&states,
 | 
						|
											FixupString(strings[2]),
 | 
						|
											FixupString(strings[1])))
 | 
						|
								!= SMCResult_Continue)
 | 
						|
							{
 | 
						|
								err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | 
						|
								goto failed;
 | 
						|
							}
 | 
						|
						} 
 | 
						|
						else if (strings[1].ptr) 
 | 
						|
						{
 | 
						|
							err = SMCError_InvalidSection3;
 | 
						|
							goto failed;
 | 
						|
						} 
 | 
						|
						else if (!curlevel) 
 | 
						|
						{
 | 
						|
							err = SMCError_InvalidSection4;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
						/* Now it's safe to leave the section */
 | 
						|
						scrap(strings);
 | 
						|
						if ((res=smc->ReadSMC_LeavingSection(&states)) != SMCResult_Continue)
 | 
						|
						{
 | 
						|
							err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
						curlevel--;
 | 
						|
					} 
 | 
						|
					else if (c == '"') 
 | 
						|
					{
 | 
						|
						/* If we get a quote mark, we always restage, but we need to do it beforehand */
 | 
						|
						if (strings[0].ptr)
 | 
						|
						{
 | 
						|
							strings[0].end = &parse_point[i];
 | 
						|
							if (rotate(strings) != NULL)
 | 
						|
							{
 | 
						|
								err = SMCError_InvalidTokens;
 | 
						|
								goto failed;
 | 
						|
							}
 | 
						|
						}
 | 
						|
						strings[0].ptr = &parse_point[i];
 | 
						|
						in_quote = true;
 | 
						|
						ignoring = true;
 | 
						|
					} 
 | 
						|
					else if (!strings[0].ptr) 
 | 
						|
					{
 | 
						|
						/* If we have no string, we must start one */
 | 
						|
						strings[0].ptr = &parse_point[i];
 | 
						|
					}
 | 
						|
					if (restage && strings[0].ptr)
 | 
						|
					{
 | 
						|
						strings[0].end = &parse_point[i];
 | 
						|
						if (rotate(strings) != NULL)
 | 
						|
						{
 | 
						|
							err = SMCError_InvalidTokens;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
					}
 | 
						|
				} 
 | 
						|
				else 
 | 
						|
				{
 | 
						|
					/* If we're eating a string and get whitespace, we need to restage.
 | 
						|
					 * (Note that if we are quoted, this is being ignored)
 | 
						|
					 */
 | 
						|
					if (strings[0].ptr)
 | 
						|
					{
 | 
						|
						/*
 | 
						|
						 * The specification says the second string in a pair does not need to be quoted.
 | 
						|
						 * Thus, we check if there's already a string on the stack.
 | 
						|
						 * If there's a newline, we always rotate so the newline has an empty starter.
 | 
						|
						 */
 | 
						|
						if (!strings[1].ptr)
 | 
						|
						{
 | 
						|
							/* There's no string, so we must move this one down and eat up another */
 | 
						|
							strings[0].end = &parse_point[i];
 | 
						|
							rotate(strings);
 | 
						|
						} 
 | 
						|
						else if (!strings[1].quoted) 
 | 
						|
						{
 | 
						|
							err = SMCError_InvalidTokens;
 | 
						|
							goto failed;
 | 
						|
						}
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			/* Advance which token we're on */
 | 
						|
			states.col++;
 | 
						|
		}
 | 
						|
 | 
						|
		if (line_begin != in_buf)
 | 
						|
		{
 | 
						|
			/* The line buffer has advanced, so it's safe to copy N bytes back to the beginning.
 | 
						|
			 * What's N?  N is the lowest point we're currently relying on.
 | 
						|
			 */
 | 
						|
			char *stage = lowstring(strings);
 | 
						|
			if (!stage || stage > line_begin)
 | 
						|
			{
 | 
						|
				stage = line_begin;
 | 
						|
			}
 | 
						|
			unsigned int bytes = read - (stage - parse_point);
 | 
						|
 | 
						|
			/* It is now safe to delete everything before the staged point */
 | 
						|
			memmove(in_buf, stage, bytes);
 | 
						|
 | 
						|
			/* Calculate the number of bytes in the new buffer */
 | 
						|
			bytes = stage - in_buf;
 | 
						|
			/* Relocate all the cached pointers to our new base */
 | 
						|
			line_begin -= bytes;
 | 
						|
			reloc(strings[0], bytes);
 | 
						|
			reloc(strings[1], bytes);
 | 
						|
			reloc(strings[2], bytes);
 | 
						|
			if (reparse_point)
 | 
						|
			{
 | 
						|
				reparse_point -= bytes;
 | 
						|
			}
 | 
						|
			if (parse_point)
 | 
						|
			{
 | 
						|
				parse_point = &parse_point[read];
 | 
						|
				parse_point -= bytes;
 | 
						|
			}
 | 
						|
		} 
 | 
						|
		else if (read == sizeof(in_buf) - 1) 
 | 
						|
		{
 | 
						|
			err = SMCError_TokenOverflow;
 | 
						|
			goto failed;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/* If we're done parsing and there are tokens left over... */
 | 
						|
	if (curlevel)
 | 
						|
	{
 | 
						|
		err = SMCError_InvalidSection5;
 | 
						|
		goto failed;
 | 
						|
	} 
 | 
						|
	else if (strings[0].ptr || strings[1].ptr) 
 | 
						|
	{
 | 
						|
		err = SMCError_InvalidTokens;
 | 
						|
		goto failed;
 | 
						|
	}
 | 
						|
	
 | 
						|
	smc->ReadSMC_ParseEnd(false, false);
 | 
						|
	
 | 
						|
	if (pStates != NULL)
 | 
						|
	{
 | 
						|
		*pStates = states;
 | 
						|
	}
 | 
						|
 | 
						|
	return SMCError_Okay;
 | 
						|
 | 
						|
failed:
 | 
						|
	if (pStates != NULL)
 | 
						|
	{
 | 
						|
		*pStates = states;
 | 
						|
	}
 | 
						|
 | 
						|
	smc->ReadSMC_ParseEnd(true, (err == SMCError_Custom));
 | 
						|
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/**
 | 
						|
 * INI parser 
 | 
						|
 */
 | 
						|
 | 
						|
bool TextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listener, unsigned int *line, unsigned int *col)
 | 
						|
{
 | 
						|
	FILE *fp = fopen(file, "rt");
 | 
						|
	unsigned int curline = 0;
 | 
						|
	unsigned int curtok;
 | 
						|
	size_t len;
 | 
						|
 | 
						|
	if (!fp)
 | 
						|
	{
 | 
						|
		if (line)
 | 
						|
		{
 | 
						|
			*line = 0;
 | 
						|
		}
 | 
						|
 | 
						|
		return false;
 | 
						|
	}
 | 
						|
 | 
						|
	char buffer[2048];
 | 
						|
	char *ptr, *save_ptr;
 | 
						|
	bool in_quote;
 | 
						|
 | 
						|
	while (!feof(fp))
 | 
						|
	{
 | 
						|
		curline++;
 | 
						|
		curtok = 0;
 | 
						|
		buffer[0] = '\0';
 | 
						|
		if (fgets(buffer, sizeof(buffer), fp) == NULL)
 | 
						|
		{
 | 
						|
			break;
 | 
						|
		}
 | 
						|
 | 
						|
		//:TODO: this will only run once, so find a nice way to move it out of the while loop
 | 
						|
		/* If this is the first line, check the first three bytes for BOM */
 | 
						|
		if (curline == 1 && 
 | 
						|
			buffer[0] == (char)0xEF && 
 | 
						|
			buffer[1] == (char)0xBB && 
 | 
						|
			buffer[2] == (char)0xBF)
 | 
						|
		{
 | 
						|
			/* We have a UTF-8 marked file... skip these bytes */
 | 
						|
			ptr = &buffer[3];
 | 
						|
		} else {
 | 
						|
			ptr = buffer;
 | 
						|
		}
 | 
						|
 | 
						|
		/***************************************************
 | 
						|
		 * We preprocess the string before parsing tokens! *
 | 
						|
		 ***************************************************/
 | 
						|
 | 
						|
		/* First strip beginning whitespace */
 | 
						|
		while (*ptr != '\0' && g_ws_chartable[(unsigned char)*ptr] != 0)
 | 
						|
		{
 | 
						|
			ptr++;
 | 
						|
		}
 | 
						|
 | 
						|
		len = strlen(ptr);
 | 
						|
 | 
						|
		if (!len)
 | 
						|
		{
 | 
						|
			continue;
 | 
						|
		}
 | 
						|
 | 
						|
		/* Now search for comment characters */
 | 
						|
		in_quote = false;
 | 
						|
		save_ptr = ptr;
 | 
						|
		for (size_t i=0; i<len; i++,ptr++)
 | 
						|
		{
 | 
						|
			if (!in_quote)
 | 
						|
			{
 | 
						|
				switch (*ptr)
 | 
						|
				{
 | 
						|
				case '"':
 | 
						|
					{
 | 
						|
						in_quote = true;
 | 
						|
						break;
 | 
						|
					}
 | 
						|
				case ';':
 | 
						|
					{
 | 
						|
						/* Stop the loop */
 | 
						|
						len = i;
 | 
						|
						/* Terminate the string here */
 | 
						|
						*ptr = '\0';
 | 
						|
						break;
 | 
						|
					}
 | 
						|
				}
 | 
						|
			} else {
 | 
						|
				if (*ptr == '"')
 | 
						|
				{
 | 
						|
					in_quote = false;
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if (!len)
 | 
						|
		{
 | 
						|
			continue;
 | 
						|
		}
 | 
						|
 | 
						|
		ptr = save_ptr;
 | 
						|
 | 
						|
		/* Lastly, strip ending whitespace off */
 | 
						|
		for (size_t i=len-1; i<len; i--)
 | 
						|
		{
 | 
						|
			if (g_ws_chartable[(unsigned char)ptr[i]])
 | 
						|
			{
 | 
						|
				ptr[i] = '\0';
 | 
						|
				len--;
 | 
						|
			} else {
 | 
						|
				break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if (!len)
 | 
						|
		{
 | 
						|
			continue;
 | 
						|
		}
 | 
						|
 | 
						|
		if (!ini_listener->ReadINI_RawLine(ptr, &curtok))
 | 
						|
		{
 | 
						|
			goto event_failed;
 | 
						|
		}
 | 
						|
 | 
						|
		if (*ptr == '[')
 | 
						|
		{
 | 
						|
			bool invalid_tokens = false;
 | 
						|
			bool got_bracket = false;
 | 
						|
			bool extra_tokens = false;
 | 
						|
			char c;
 | 
						|
			bool alnum;
 | 
						|
			wchar_t wc;
 | 
						|
 | 
						|
			for (size_t i=1; i<len; i++)
 | 
						|
			{
 | 
						|
				c = ptr[i];
 | 
						|
				alnum = false;
 | 
						|
 | 
						|
				if (c & (1<<7))
 | 
						|
				{
 | 
						|
					if (mbtowc(&wc, &ptr[i], len-i) != -1)
 | 
						|
					{
 | 
						|
						alnum = (iswalnum(wc) != 0);
 | 
						|
						i += _GetUTF8CharBytes(&ptr[i]) - 1;
 | 
						|
					}
 | 
						|
				} else {
 | 
						|
					alnum = (isalnum(c) != 0) || (g_ini_chartable1[(unsigned char)c] != 0);
 | 
						|
				}
 | 
						|
				if (!alnum)
 | 
						|
				{
 | 
						|
					/* First check - is this a bracket? */
 | 
						|
					if (c == ']')
 | 
						|
					{
 | 
						|
						/* Yes! */
 | 
						|
						got_bracket = true;
 | 
						|
						/* If this isn't the last character... */
 | 
						|
						if (i != len - 1)
 | 
						|
						{
 | 
						|
							extra_tokens = true;
 | 
						|
						}
 | 
						|
						/* terminate */
 | 
						|
						ptr[i] = '\0';
 | 
						|
						break;
 | 
						|
					} else {
 | 
						|
						/* n...No! Continue copying. */
 | 
						|
						invalid_tokens = true;
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			/* Tell the handler */
 | 
						|
			if (!ini_listener->ReadINI_NewSection(&ptr[1], invalid_tokens, got_bracket, extra_tokens, &curtok))
 | 
						|
			{
 | 
						|
				goto event_failed;
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			char *key_ptr = ptr;
 | 
						|
			char *val_ptr = NULL;
 | 
						|
			char c;
 | 
						|
			size_t first_space = 0;
 | 
						|
			bool invalid_tokens = false;
 | 
						|
			bool equal_token = false;
 | 
						|
			bool quotes = false;
 | 
						|
			bool alnum;
 | 
						|
			wchar_t wc;
 | 
						|
 | 
						|
			for (size_t i=0; i<len; i++)
 | 
						|
			{
 | 
						|
				c = ptr[i];
 | 
						|
				alnum = false;
 | 
						|
				/* is this an invalid char? */
 | 
						|
				if (c & (1<<7))
 | 
						|
				{
 | 
						|
					if (mbtowc(&wc, &ptr[i], len-i) != -1)
 | 
						|
					{
 | 
						|
						alnum = (iswalnum(wc) != 0);
 | 
						|
						i += _GetUTF8CharBytes(&ptr[i]) - 1;
 | 
						|
					}
 | 
						|
				} else {
 | 
						|
					alnum = (isalnum(c) != 0) || (g_ini_chartable1[(unsigned char)c] != 0);
 | 
						|
				}
 | 
						|
 | 
						|
				if (!alnum)
 | 
						|
				{
 | 
						|
					if (g_ws_chartable[(unsigned char)c])
 | 
						|
					{
 | 
						|
						/* if it's a space, keep track of the first occurring space */
 | 
						|
						if (!first_space)
 | 
						|
						{
 | 
						|
							first_space = i;
 | 
						|
						}
 | 
						|
					} else {
 | 
						|
						if (c == '=')
 | 
						|
						{
 | 
						|
							/* if it's an equal sign, we're done with the key */
 | 
						|
							if (first_space)
 | 
						|
							{
 | 
						|
								/* remove excess whitespace */
 | 
						|
								key_ptr[first_space] = '\0';
 | 
						|
							} else {
 | 
						|
								/* remove the equal sign */
 | 
						|
								key_ptr[i] = '\0';
 | 
						|
							}
 | 
						|
							if (ptr[++i] != '\0')
 | 
						|
							{
 | 
						|
								/* If this isn't the end, set next pointer */
 | 
						|
								val_ptr = &ptr[i];
 | 
						|
							}
 | 
						|
							equal_token = true;
 | 
						|
							break;
 | 
						|
						} else {
 | 
						|
							/* Mark that we got something invalid! */
 | 
						|
							invalid_tokens = true;
 | 
						|
							first_space = 0;
 | 
						|
						}
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			/* Now we need to parse the value, if any */
 | 
						|
			if (val_ptr)
 | 
						|
			{
 | 
						|
				/* eat up spaces! there shouldn't be any h*/
 | 
						|
				while ((*val_ptr != '\0') && g_ws_chartable[(unsigned char)*val_ptr] != 0)
 | 
						|
				{
 | 
						|
					val_ptr++;
 | 
						|
				}
 | 
						|
				if (*val_ptr == '\0')
 | 
						|
				{
 | 
						|
					val_ptr = NULL;
 | 
						|
					goto skip_value;
 | 
						|
				}
 | 
						|
				/* Do we have an initial quote? If so, the parsing rules change! */
 | 
						|
				if (*val_ptr == '"' && *val_ptr != '\0')
 | 
						|
				{
 | 
						|
					len = strlen(val_ptr);
 | 
						|
					if (val_ptr[len-1] == '"')
 | 
						|
					{
 | 
						|
						/* Strip quotes! */
 | 
						|
						val_ptr[--len] = '\0';
 | 
						|
						val_ptr++;
 | 
						|
						quotes = true;
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
skip_value:
 | 
						|
			/* We're done! */
 | 
						|
			curtok = val_ptr - buffer;
 | 
						|
			if (!ini_listener->ReadINI_KeyValue(key_ptr, val_ptr, invalid_tokens, equal_token, quotes, &curtok))
 | 
						|
			{
 | 
						|
				curtok = 0;
 | 
						|
				goto event_failed;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if (line)
 | 
						|
	{
 | 
						|
		*line = curline;
 | 
						|
	}
 | 
						|
 | 
						|
	fclose(fp);
 | 
						|
 | 
						|
	return true;
 | 
						|
 | 
						|
event_failed:
 | 
						|
	if (line)
 | 
						|
	{
 | 
						|
		*line = curline;
 | 
						|
	}
 | 
						|
 | 
						|
	if (col)
 | 
						|
	{
 | 
						|
		*col = curtok;
 | 
						|
	}
 | 
						|
 | 
						|
	fclose(fp);
 | 
						|
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
const char *TextParsers::GetSMCErrorString(SMCError err)
 | 
						|
{
 | 
						|
	static const char *s_errors[] = 
 | 
						|
	{
 | 
						|
		NULL,
 | 
						|
		"Stream failed to open",
 | 
						|
		"Stream returned read error",
 | 
						|
		"Callback error",
 | 
						|
		"Un-quoted section has invalid tokens",
 | 
						|
		"Section declared without header",
 | 
						|
		"Section declared with unknown tokens",
 | 
						|
		"Section ending without a matching section beginning",
 | 
						|
		"Section beginning without a matching ending",
 | 
						|
		"Line contained too many invalid tokens",
 | 
						|
		"Token buffer overflowed",
 | 
						|
		"A property was declared outside of a section",
 | 
						|
	};
 | 
						|
 | 
						|
	if (err < SMCError_Okay || err > SMCError_InvalidProperty1)
 | 
						|
	{
 | 
						|
		return NULL;
 | 
						|
	}
 | 
						|
 | 
						|
	return s_errors[err];
 | 
						|
}
 |