1098 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1098 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /**
 | |
|  * vim: set ts=4 :
 | |
|  * =============================================================================
 | |
|  * SourceMod
 | |
|  * Copyright (C) 2004-2008 AlliedModders LLC.  All rights reserved.
 | |
|  * =============================================================================
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify it under
 | |
|  * the terms of the GNU General Public License, version 3.0, as published by the
 | |
|  * Free Software Foundation.
 | |
|  * 
 | |
|  * This program is distributed in the hope that it will be useful, but WITHOUT
 | |
|  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
|  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 | |
|  * details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU General Public License along with
 | |
|  * this program.  If not, see <http://www.gnu.org/licenses/>.
 | |
|  *
 | |
|  * As a special exception, AlliedModders LLC gives you permission to link the
 | |
|  * code of this program (as well as its derivative works) to "Half-Life 2," the
 | |
|  * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
 | |
|  * by the Valve Corporation.  You must obey the GNU General Public License in
 | |
|  * all respects for all other code used.  Additionally, AlliedModders LLC grants
 | |
|  * this exception to all derivative works.  AlliedModders LLC defines further
 | |
|  * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
 | |
|  * or <http://www.sourcemod.net/license.php>.
 | |
|  *
 | |
|  * Version: $Id$
 | |
|  */
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <ctype.h>
 | |
| #include <wctype.h>
 | |
| #include <string.h>
 | |
| #include <stdlib.h>
 | |
| #include <assert.h>
 | |
| #include "TextParsers.h"
 | |
| #include <ILibrarySys.h>
 | |
| 
 | |
| TextParsers g_TextParser;
 | |
| ITextParsers *textparsers = &g_TextParser;
 | |
| 
 | |
| static int g_ini_chartable1[255] = {0};
 | |
| static int g_ws_chartable[255] = {0};
 | |
| 
 | |
| bool TextParsers::IsWhitespace(const char *stream)
 | |
| {
 | |
| 	return g_ws_chartable[(unsigned char)*stream] == 1;
 | |
| }
 | |
| 
 | |
| TextParsers::TextParsers()
 | |
| {
 | |
| 	g_ini_chartable1[(unsigned)'_'] = 1;
 | |
| 	g_ini_chartable1[(unsigned)'-'] = 1;
 | |
| 	g_ini_chartable1[(unsigned)','] = 1;
 | |
| 	g_ini_chartable1[(unsigned)'+'] = 1;
 | |
| 	g_ini_chartable1[(unsigned)'.'] = 1;
 | |
| 	g_ini_chartable1[(unsigned)'$'] = 1;
 | |
| 	g_ini_chartable1[(unsigned)'?'] = 1;
 | |
| 	g_ini_chartable1[(unsigned)'/'] = 1;
 | |
| 	g_ws_chartable[(unsigned)'\n'] = 1;
 | |
| 	g_ws_chartable[(unsigned)'\v'] = 1;
 | |
| 	g_ws_chartable[(unsigned)'\r'] = 1;
 | |
| 	g_ws_chartable[(unsigned)'\t'] = 1;
 | |
| 	g_ws_chartable[(unsigned)'\f'] = 1;
 | |
| 	g_ws_chartable[(unsigned)' '] = 1;
 | |
| }
 | |
| 
 | |
| void TextParsers::OnSourceModAllInitialized()
 | |
| {
 | |
| 	sharesys->AddInterface(NULL, this);
 | |
| }
 | |
| 
 | |
| unsigned int TextParsers::GetUTF8CharBytes(const char *stream)
 | |
| {
 | |
| 	return _GetUTF8CharBytes(stream);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * File streams
 | |
|  */
 | |
| 
 | |
| bool FileStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read)
 | |
| {
 | |
| 	size_t num = fread(buffer, 1, maxlength, (FILE *)stream);
 | |
| 
 | |
| 	*read = static_cast<unsigned int>(num);
 | |
| 
 | |
| 	if (num == 0 && feof((FILE *)stream))
 | |
| 	{
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	return (ferror((FILE *)stream) == 0);
 | |
| }
 | |
| 
 | |
| SMCError TextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc, SMCStates *states)
 | |
| {
 | |
| 	FILE *fp = fopen(file, "rt");
 | |
| 
 | |
| 	if (!fp)
 | |
| 	{
 | |
| 		if (states != NULL)
 | |
| 		{
 | |
| 			states->line = 0;
 | |
| 			states->col = 0;
 | |
| 		}
 | |
| 		return SMCError_StreamOpen;
 | |
| 	}
 | |
| 
 | |
| 	SMCError result = ParseStream_SMC(fp, FileStreamReader, smc, states);
 | |
| 
 | |
| 	fclose(fp);
 | |
| 
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| SMCError TextParsers::ParseSMCFile(const char *file,
 | |
| 								   ITextListener_SMC *smc_listener,
 | |
| 								   SMCStates *states,
 | |
| 								   char *buffer,
 | |
| 								   size_t maxsize)
 | |
| {
 | |
| 	const char *errstr;
 | |
| 	FILE *fp = fopen(file, "rt");
 | |
| 
 | |
| 	if (fp == NULL)
 | |
| 	{
 | |
| 		char error[256] = "unknown";
 | |
| 		if (states != NULL)
 | |
| 		{
 | |
| 			states->line = 0;
 | |
| 			states->col = 0;
 | |
| 		}
 | |
| 		libsys->GetPlatformError(error, sizeof(error));
 | |
| 		smcore.Format(buffer, maxsize, "File could not be opened: %s", error);
 | |
| 		return SMCError_StreamOpen;
 | |
| 	}
 | |
| 
 | |
| 	SMCError result = ParseStream_SMC(fp, FileStreamReader, smc_listener, states);
 | |
| 
 | |
| 	fclose(fp);
 | |
| 
 | |
| 	errstr = GetSMCErrorString(result);
 | |
| 	smcore.Format(buffer, maxsize, "%s", errstr != NULL ? errstr : "Unknown error");
 | |
| 
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| struct RawStream
 | |
| {
 | |
| 	const char *stream;
 | |
| 	size_t length;
 | |
| 	size_t pos;
 | |
| };
 | |
| 
 | |
| bool RawStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read)
 | |
| {
 | |
| 	RawStream *rs = (RawStream *)stream;
 | |
| 
 | |
| 	if (rs->pos >= rs->length)
 | |
| 	{
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	size_t remaining = rs->length - rs->pos;
 | |
| 
 | |
| 	/* Use the smaller of the two */
 | |
| 	size_t copy = (remaining > maxlength) ? maxlength : remaining;
 | |
| 
 | |
| 	memcpy(buffer, &rs->stream[rs->pos], copy);
 | |
| 	rs->pos += copy;
 | |
| 	*read = copy;
 | |
| 	assert(rs->pos <= rs->length);
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| SMCError TextParsers::ParseSMCStream(const char *stream,
 | |
| 									 size_t length,
 | |
| 									 ITextListener_SMC *smc_listener,
 | |
| 									 SMCStates *states,
 | |
| 									 char *buffer,
 | |
| 									 size_t maxsize)
 | |
| {
 | |
| 	RawStream rs;
 | |
| 	SMCError result;
 | |
| 
 | |
| 	rs.stream = stream;
 | |
| 	rs.length = length;
 | |
| 	rs.pos = 0;
 | |
| 
 | |
| 	result = ParseStream_SMC(&rs, RawStreamReader, smc_listener, states);
 | |
| 
 | |
| 	const char *errstr = GetSMCErrorString(result);
 | |
| 	smcore.Format(buffer, maxsize, "%s", errstr != NULL ? errstr : "Unknown error");
 | |
| 
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| /** 
 | |
|  * Raw parsing of streams with helper functions
 | |
|  */
 | |
| 
 | |
| struct StringInfo
 | |
| {
 | |
| 	StringInfo() : quoted(false), ptr(NULL), end(NULL), special(false) { }
 | |
| 	bool quoted;
 | |
| 	char *ptr;
 | |
| 	char *end;
 | |
| 	bool special;
 | |
| };
 | |
| 
 | |
| const char *FixupString(StringInfo &data)
 | |
| {
 | |
| 	if (!data.ptr)
 | |
| 	{
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	if (data.quoted)
 | |
| 	{
 | |
| 		data.ptr++;
 | |
| 	}
 | |
| #if defined _DEBUG
 | |
| 	else {
 | |
| 		/* A string will never have beginning whitespace because we ignore it in the stream.
 | |
| 		 * Furthermore, if there is trailing whitespace, the end ptr will point to it, so it is valid
 | |
| 		 * to overwrite!  Lastly, the last character must be whitespace or a comment/invalid character.
 | |
| 		 */
 | |
|  	}
 | |
| #endif
 | |
| 
 | |
| 	/* Do some extra work on strings that have special quoted characters. */
 | |
| 	if (data.special)
 | |
| 	{
 | |
| 		char *outptr = data.ptr;
 | |
| 		size_t len = data.end - data.ptr;
 | |
| 		if (len >= 2)
 | |
| 		{
 | |
| 			for (size_t i=0; i<len; i++)
 | |
| 			{
 | |
| 				if (data.ptr[i] == '\\' && i < len - 1)
 | |
| 				{
 | |
| 					/* Resolve the next character. */
 | |
| 					i++;
 | |
| 					if (data.ptr[i] == 'n')
 | |
| 					{
 | |
| 						data.ptr[i] = '\n';
 | |
| 					} else if (data.ptr[i] == 't') {
 | |
| 						data.ptr[i] = '\t';
 | |
| 					} else if (data.ptr[i] == 'r') {
 | |
| 						data.ptr[i] = '\r';
 | |
| 					} else if (data.ptr[i] != '\\'
 | |
| 						&& data.ptr[i] != '"') {
 | |
| 						/* This character is invalid, so go back one */
 | |
| 						i--;
 | |
| 					}
 | |
| 				}
 | |
| 				*outptr++ = data.ptr[i];
 | |
| 			}
 | |
| 			*outptr = '\0';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (data.end)
 | |
| 	{
 | |
| 		*(data.end) = '\0';
 | |
| 	}
 | |
| 
 | |
| 	return data.ptr;
 | |
| }
 | |
| 
 | |
| const char *rotate(StringInfo info[3])
 | |
| {
 | |
| 	if (info[2].ptr != NULL)
 | |
| 	{
 | |
| 		return info[2].ptr;
 | |
| 	}
 | |
| 
 | |
| 	if (info[0].ptr != NULL)
 | |
| 	{
 | |
| 		info[2] = info[1];
 | |
| 		info[1] = info[0];
 | |
| 		info[0] = StringInfo();
 | |
| 	}
 | |
| 	
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| void scrap(StringInfo info[3])
 | |
| {
 | |
| 	info[2] = StringInfo();
 | |
| 	info[1] = StringInfo();
 | |
| 	info[0] = StringInfo();
 | |
| }
 | |
| 
 | |
| void reloc(StringInfo &data, unsigned int bytes)
 | |
| {
 | |
| 	if (data.ptr)
 | |
| 	{
 | |
| 		data.ptr -= bytes;
 | |
| 	}
 | |
| 	if (data.end)
 | |
| 	{
 | |
| 		data.end -= bytes;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| char *lowstring(StringInfo info[3])
 | |
| {
 | |
| 	for (int i=2; i>=0; i--)
 | |
| 	{
 | |
| 		if (info[i].ptr)
 | |
| 		{
 | |
| 			return info[i].ptr;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| SMCError TextParsers::ParseStream_SMC(void *stream, 
 | |
| 								   STREAMREADER srdr, 
 | |
| 								   ITextListener_SMC *smc, 
 | |
| 								   SMCStates *pStates)
 | |
| {
 | |
| 	char *reparse_point = NULL;
 | |
| 	char in_buf[4096];
 | |
| 	char *parse_point = in_buf;
 | |
| 	char *line_begin = in_buf;
 | |
| 	unsigned int read;
 | |
| 	unsigned int curlevel = 0;
 | |
| 	bool in_quote = false;
 | |
| 	bool ignoring = false;
 | |
| 	bool eol_comment = false;
 | |
| 	bool ml_comment = false;
 | |
| 	unsigned int i;
 | |
| 	SMCError err = SMCError_Okay;
 | |
| 	SMCResult res;
 | |
| 	SMCStates states;
 | |
| 	char c;
 | |
| 
 | |
| 	StringInfo strings[3];
 | |
| 	StringInfo emptystring;
 | |
| 
 | |
| 	states.line = 1;
 | |
| 	states.col = 0;
 | |
| 
 | |
| 	smc->ReadSMC_ParseStart();
 | |
| 
 | |
| 	/**
 | |
| 	 * The stream reader reads in as much as it can fill the buffer with.
 | |
| 	 * It then processes the buffer.  If the buffer cannot be fully processed, for example, 
 | |
| 	 * a line is left hanging with no newline, then the contents of the buffer is shifted 
 | |
| 	 * down, and the buffer is filled from the stream reader again.
 | |
| 	 *
 | |
| 	 * What makes this particularly annoying is that we cache pointers everywhere, so when 
 | |
| 	 * the shifting process takes place, all those pointers must be shifted as well.
 | |
| 	 */
 | |
| 	while (srdr(stream, parse_point, sizeof(in_buf) - (parse_point - in_buf) - 1, &read))
 | |
| 	{
 | |
| 		if (!read)
 | |
| 		{
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		/* Check for BOM markings, which is only relevant on the first line.
 | |
| 		 * Not worth it, but it could be moved out of the loop.
 | |
| 		 */
 | |
| 		if (states.line == 1 && 
 | |
| 			in_buf[0] == (char)0xEF && 
 | |
| 			in_buf[1] == (char)0xBB && 
 | |
| 			in_buf[2] == (char)0xBF)
 | |
| 		{
 | |
| 			/* Move EVERYTHING down :\ */
 | |
| 			memmove(in_buf, &in_buf[3], read - 3);
 | |
| 			read -= 3;
 | |
| 		}
 | |
| 
 | |
| 		if (reparse_point)
 | |
| 		{
 | |
| 			read += (parse_point - reparse_point);
 | |
| 			parse_point = reparse_point;
 | |
| 			reparse_point = NULL;
 | |
| 		}
 | |
| 
 | |
| 		for (i=0; i<read; i++)
 | |
| 		{
 | |
| 			c = parse_point[i];
 | |
| 			if (c == '\n')
 | |
| 			{
 | |
| 				/* If we got a newline, there's a lot of things that could have happened in the interim.
 | |
| 				 * First, let's make sure the staged strings are rotated.
 | |
| 				 */
 | |
| 				if (strings[0].ptr)
 | |
| 				{
 | |
| 					strings[0].end = &parse_point[i];
 | |
| 					if (rotate(strings) != NULL)
 | |
| 					{
 | |
| 						err = SMCError_InvalidTokens;
 | |
| 						goto failed;
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				/* Next, let's clear some line-based values that may no longer have meaning */
 | |
| 				eol_comment = false;
 | |
| 				in_quote = false;
 | |
| 				if (ignoring && !ml_comment)
 | |
| 				{
 | |
| 					ignoring = false;
 | |
| 				}
 | |
| 
 | |
| 				/* Pass the raw line onto the listener.  We terminate the line so the receiver 
 | |
| 				 * doesn't get tons of useless info.  We restore the newline after.
 | |
| 				 */
 | |
| 				parse_point[i] = '\0';
 | |
| 				if ((res=smc->ReadSMC_RawLine(&states, line_begin)) != SMCResult_Continue)
 | |
| 				{
 | |
| 					err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | |
| 					goto failed;
 | |
| 				}
 | |
| 				parse_point[i] = '\n';
 | |
| 
 | |
| 				/* Now we check the sanity of our staged strings! */
 | |
| 				if (strings[2].ptr)
 | |
| 				{
 | |
| 					if (!curlevel)
 | |
| 					{
 | |
| 						err = SMCError_InvalidProperty1;
 | |
| 						goto failed;
 | |
| 					}
 | |
| 					/* Assume the next string is a property and pass the info on. */
 | |
| 					if ((res=smc->ReadSMC_KeyValue(
 | |
| 						&states,
 | |
| 						FixupString(strings[2]),
 | |
| 						FixupString(strings[1]))) != SMCResult_Continue)
 | |
| 					{
 | |
| 						err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | |
| 						goto failed;
 | |
| 					}
 | |
| 					scrap(strings);
 | |
| 				}
 | |
| 
 | |
| 				/* Change the states for the next line */
 | |
| 				states.col = 0;
 | |
| 				states.line++;
 | |
| 				line_begin = &parse_point[i+1];		//Note: safe because this gets relocated later
 | |
| 			} 
 | |
| 			else if (ignoring) 
 | |
| 			{
 | |
| 				if (in_quote)
 | |
| 				{
 | |
| 					/* If i was 0, we could have reparsed, so make sure there's no buffer underrun */
 | |
| 					if ((&parse_point[i] != in_buf) && c == '"' && parse_point[i-1] != '\\')
 | |
| 					{
 | |
| 						/* If we reached a quote in an ignore phase,
 | |
| 						 * we're staging a string and we must rotate it out.
 | |
| 						 */
 | |
| 						in_quote = false;
 | |
| 						ignoring = false;
 | |
| 						/* Set our info */
 | |
| 						strings[0].end = &parse_point[i];
 | |
| 						strings[0].quoted = true;
 | |
| 						if (rotate(strings) != NULL)
 | |
| 						{
 | |
| 							/* If we rotated too many strings, there was too much crap on one line */
 | |
| 							err = SMCError_InvalidTokens;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 					} 
 | |
| 					else if (c == '\\') 
 | |
| 					{
 | |
| 						strings[0].special = true;
 | |
| 						if (i == (read - 1))
 | |
| 						{
 | |
| 							reparse_point = &parse_point[i];
 | |
| 							break;
 | |
| 						}
 | |
| 					}
 | |
| 				} 
 | |
| 				else if (ml_comment) 
 | |
| 				{
 | |
| 					if (c == '*')
 | |
| 					{
 | |
| 						/* Check if we need to get more input first */
 | |
| 						if (i == read - 1)
 | |
| 						{
 | |
| 							reparse_point = &parse_point[i];
 | |
| 							break;
 | |
| 						}
 | |
| 						if (parse_point[i+1] == '/')
 | |
| 						{
 | |
| 							ml_comment = false;
 | |
| 							ignoring = false;
 | |
| 							/* We should not be staging anything right now. */
 | |
| 							assert(strings[0].ptr == NULL);
 | |
| 							/* Advance the input stream so we don't choke on this token */
 | |
| 							i++;
 | |
| 							states.col++;
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 			} 
 | |
| 			else 
 | |
| 			{
 | |
| 				/* Check if we're whitespace or not */
 | |
| 				if (!g_ws_chartable[(unsigned char)c])
 | |
| 				{
 | |
| 					bool restage = false;
 | |
| 					/* Check various special tokens:
 | |
| 					 * ;
 | |
| 					 * //
 | |
| 					 * / *
 | |
| 					 * {
 | |
| 					 * }
 | |
| 					 */
 | |
| 					if (c == ';' || c == '/')
 | |
| 					{
 | |
| 						/* If it's a line-based comment (that is, ; or //)
 | |
| 						 * we will need to scrap everything until the end of the line.
 | |
| 						 */
 | |
| 						if (c == '/')
 | |
| 						{
 | |
| 							if (i == read - 1)
 | |
| 							{
 | |
| 								/* If we reached the end of the look-ahead, we need to re-check our input.
 | |
| 								 * Breaking out will force this to be the new reparse point!
 | |
| 								 */
 | |
| 								reparse_point = &parse_point[i];
 | |
| 								break;
 | |
| 							}
 | |
| 							if (parse_point[i + 1] == '/')
 | |
| 							{
 | |
| 								/* standard comment */
 | |
| 								ignoring = true;
 | |
| 								eol_comment = true;
 | |
| 								restage = true;
 | |
| 							} 
 | |
| 							else if (parse_point[i+1] == '*') 
 | |
| 							{
 | |
| 								/* inline comment - start ignoring */
 | |
| 								ignoring = true;
 | |
| 								ml_comment = true;
 | |
| 								/* yes, we restage, meaning that:
 | |
| 								 * STR/ *stuff* /ING  (space because ml comments don't nest in C++)
 | |
| 								 * will not generate 'STRING', but rather 'STR' and 'ING'.
 | |
| 								 * This should be a rare occurrence and is done here for convenience.
 | |
| 								 */
 | |
| 								restage = true;
 | |
| 							}
 | |
| 						} 
 | |
| 						else 
 | |
| 						{
 | |
| 							ignoring = true;
 | |
| 							eol_comment = true;
 | |
| 							restage = true;
 | |
| 						}
 | |
| 					} 
 | |
| 					else if (c == '{') 
 | |
| 					{
 | |
| 						/* If we are staging a string, we must rotate here */
 | |
| 						if (strings[0].ptr)
 | |
| 						{
 | |
| 							/* We have unacceptable tokens on this line */
 | |
| 							if (rotate(strings) != NULL)
 | |
| 							{
 | |
| 								err = SMCError_InvalidSection1;
 | |
| 								goto failed;
 | |
| 							}
 | |
| 						}
 | |
| 						/* Sections must always be alone */
 | |
| 						if (strings[2].ptr != NULL)
 | |
| 						{
 | |
| 							err = SMCError_InvalidSection1;
 | |
| 							goto failed;
 | |
| 						} 
 | |
| 						else if (strings[1].ptr == NULL)
 | |
| 						{
 | |
| 							err = SMCError_InvalidSection2;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 						if ((res=smc->ReadSMC_NewSection(&states, FixupString(strings[1])))
 | |
| 							!= SMCResult_Continue)
 | |
| 						{
 | |
| 							err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 						strings[1] = emptystring;
 | |
| 						curlevel++;
 | |
| 					} 
 | |
| 					else if (c == '}') 
 | |
| 					{
 | |
| 						/* Unlike our matching friend, this can be on the same line as something prior */
 | |
| 						if (rotate(strings) != NULL)
 | |
| 						{
 | |
| 							err = SMCError_InvalidSection3;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 						if (strings[2].ptr)
 | |
| 						{
 | |
| 							if (!curlevel)
 | |
| 							{
 | |
| 								err = SMCError_InvalidProperty1;
 | |
| 								goto failed;
 | |
| 							}
 | |
| 							if ((res=smc->ReadSMC_KeyValue(
 | |
| 											&states,
 | |
| 											FixupString(strings[2]),
 | |
| 											FixupString(strings[1])))
 | |
| 								!= SMCResult_Continue)
 | |
| 							{
 | |
| 								err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | |
| 								goto failed;
 | |
| 							}
 | |
| 						} 
 | |
| 						else if (strings[1].ptr) 
 | |
| 						{
 | |
| 							err = SMCError_InvalidSection3;
 | |
| 							goto failed;
 | |
| 						} 
 | |
| 						else if (!curlevel) 
 | |
| 						{
 | |
| 							err = SMCError_InvalidSection4;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 						/* Now it's safe to leave the section */
 | |
| 						scrap(strings);
 | |
| 						if ((res=smc->ReadSMC_LeavingSection(&states)) != SMCResult_Continue)
 | |
| 						{
 | |
| 							err = (res == SMCResult_HaltFail) ? SMCError_Custom : SMCError_Okay;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 						curlevel--;
 | |
| 					} 
 | |
| 					else if (c == '"') 
 | |
| 					{
 | |
| 						/* If we get a quote mark, we always restage, but we need to do it beforehand */
 | |
| 						if (strings[0].ptr)
 | |
| 						{
 | |
| 							strings[0].end = &parse_point[i];
 | |
| 							if (rotate(strings) != NULL)
 | |
| 							{
 | |
| 								err = SMCError_InvalidTokens;
 | |
| 								goto failed;
 | |
| 							}
 | |
| 						}
 | |
| 						strings[0].ptr = &parse_point[i];
 | |
| 						in_quote = true;
 | |
| 						ignoring = true;
 | |
| 					} 
 | |
| 					else if (!strings[0].ptr) 
 | |
| 					{
 | |
| 						/* If we have no string, we must start one */
 | |
| 						strings[0].ptr = &parse_point[i];
 | |
| 					}
 | |
| 					if (restage && strings[0].ptr)
 | |
| 					{
 | |
| 						strings[0].end = &parse_point[i];
 | |
| 						if (rotate(strings) != NULL)
 | |
| 						{
 | |
| 							err = SMCError_InvalidTokens;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 					}
 | |
| 				} 
 | |
| 				else 
 | |
| 				{
 | |
| 					/* If we're eating a string and get whitespace, we need to restage.
 | |
| 					 * (Note that if we are quoted, this is being ignored)
 | |
| 					 */
 | |
| 					if (strings[0].ptr)
 | |
| 					{
 | |
| 						/*
 | |
| 						 * The specification says the second string in a pair does not need to be quoted.
 | |
| 						 * Thus, we check if there's already a string on the stack.
 | |
| 						 * If there's a newline, we always rotate so the newline has an empty starter.
 | |
| 						 */
 | |
| 						if (!strings[1].ptr)
 | |
| 						{
 | |
| 							/* There's no string, so we must move this one down and eat up another */
 | |
| 							strings[0].end = &parse_point[i];
 | |
| 							rotate(strings);
 | |
| 						} 
 | |
| 						else if (!strings[1].quoted) 
 | |
| 						{
 | |
| 							err = SMCError_InvalidTokens;
 | |
| 							goto failed;
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			/* Advance which token we're on */
 | |
| 			states.col++;
 | |
| 		}
 | |
| 
 | |
| 		if (line_begin != in_buf)
 | |
| 		{
 | |
| 			/* The line buffer has advanced, so it's safe to copy N bytes back to the beginning.
 | |
| 			 * What's N?  N is the lowest point we're currently relying on.
 | |
| 			 */
 | |
| 			char *stage = lowstring(strings);
 | |
| 			if (!stage || stage > line_begin)
 | |
| 			{
 | |
| 				stage = line_begin;
 | |
| 			}
 | |
| 			unsigned int bytes = read - (stage - parse_point);
 | |
| 
 | |
| 			/* It is now safe to delete everything before the staged point */
 | |
| 			memmove(in_buf, stage, bytes);
 | |
| 
 | |
| 			/* Calculate the number of bytes in the new buffer */
 | |
| 			bytes = stage - in_buf;
 | |
| 			/* Relocate all the cached pointers to our new base */
 | |
| 			line_begin -= bytes;
 | |
| 			reloc(strings[0], bytes);
 | |
| 			reloc(strings[1], bytes);
 | |
| 			reloc(strings[2], bytes);
 | |
| 			if (reparse_point)
 | |
| 			{
 | |
| 				reparse_point -= bytes;
 | |
| 			}
 | |
| 			if (parse_point)
 | |
| 			{
 | |
| 				parse_point = &parse_point[read];
 | |
| 				parse_point -= bytes;
 | |
| 			}
 | |
| 		} 
 | |
| 		else if (read == sizeof(in_buf) - 1) 
 | |
| 		{
 | |
| 			err = SMCError_TokenOverflow;
 | |
| 			goto failed;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* If we're done parsing and there are tokens left over... */
 | |
| 	if (curlevel)
 | |
| 	{
 | |
| 		err = SMCError_InvalidSection5;
 | |
| 		goto failed;
 | |
| 	} 
 | |
| 	else if (strings[0].ptr || strings[1].ptr) 
 | |
| 	{
 | |
| 		err = SMCError_InvalidTokens;
 | |
| 		goto failed;
 | |
| 	}
 | |
| 	
 | |
| 	smc->ReadSMC_ParseEnd(false, false);
 | |
| 	
 | |
| 	if (pStates != NULL)
 | |
| 	{
 | |
| 		*pStates = states;
 | |
| 	}
 | |
| 
 | |
| 	return SMCError_Okay;
 | |
| 
 | |
| failed:
 | |
| 	if (pStates != NULL)
 | |
| 	{
 | |
| 		*pStates = states;
 | |
| 	}
 | |
| 
 | |
| 	smc->ReadSMC_ParseEnd(true, (err == SMCError_Custom));
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * INI parser 
 | |
|  */
 | |
| 
 | |
| bool TextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listener, unsigned int *line, unsigned int *col)
 | |
| {
 | |
| 	FILE *fp = fopen(file, "rt");
 | |
| 	unsigned int curline = 0;
 | |
| 	unsigned int curtok;
 | |
| 	size_t len;
 | |
| 
 | |
| 	if (!fp)
 | |
| 	{
 | |
| 		if (line)
 | |
| 		{
 | |
| 			*line = 0;
 | |
| 		}
 | |
| 
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	char buffer[2048];
 | |
| 	char *ptr, *save_ptr;
 | |
| 	bool in_quote;
 | |
| 
 | |
| 	while (!feof(fp))
 | |
| 	{
 | |
| 		curline++;
 | |
| 		curtok = 0;
 | |
| 		buffer[0] = '\0';
 | |
| 		if (fgets(buffer, sizeof(buffer), fp) == NULL)
 | |
| 		{
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		//:TODO: this will only run once, so find a nice way to move it out of the while loop
 | |
| 		/* If this is the first line, check the first three bytes for BOM */
 | |
| 		if (curline == 1 && 
 | |
| 			buffer[0] == (char)0xEF && 
 | |
| 			buffer[1] == (char)0xBB && 
 | |
| 			buffer[2] == (char)0xBF)
 | |
| 		{
 | |
| 			/* We have a UTF-8 marked file... skip these bytes */
 | |
| 			ptr = &buffer[3];
 | |
| 		} else {
 | |
| 			ptr = buffer;
 | |
| 		}
 | |
| 
 | |
| 		/***************************************************
 | |
| 		 * We preprocess the string before parsing tokens! *
 | |
| 		 ***************************************************/
 | |
| 
 | |
| 		/* First strip beginning whitespace */
 | |
| 		while (*ptr != '\0' && g_ws_chartable[(unsigned char)*ptr] != 0)
 | |
| 		{
 | |
| 			ptr++;
 | |
| 		}
 | |
| 
 | |
| 		len = strlen(ptr);
 | |
| 
 | |
| 		if (!len)
 | |
| 		{
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* Now search for comment characters */
 | |
| 		in_quote = false;
 | |
| 		save_ptr = ptr;
 | |
| 		for (size_t i=0; i<len; i++,ptr++)
 | |
| 		{
 | |
| 			if (!in_quote)
 | |
| 			{
 | |
| 				switch (*ptr)
 | |
| 				{
 | |
| 				case '"':
 | |
| 					{
 | |
| 						in_quote = true;
 | |
| 						break;
 | |
| 					}
 | |
| 				case ';':
 | |
| 					{
 | |
| 						/* Stop the loop */
 | |
| 						len = i;
 | |
| 						/* Terminate the string here */
 | |
| 						*ptr = '\0';
 | |
| 						break;
 | |
| 					}
 | |
| 				}
 | |
| 			} else {
 | |
| 				if (*ptr == '"')
 | |
| 				{
 | |
| 					in_quote = false;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if (!len)
 | |
| 		{
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		ptr = save_ptr;
 | |
| 
 | |
| 		/* Lastly, strip ending whitespace off */
 | |
| 		for (size_t i=len-1; i<len; i--)
 | |
| 		{
 | |
| 			if (g_ws_chartable[(unsigned char)ptr[i]])
 | |
| 			{
 | |
| 				ptr[i] = '\0';
 | |
| 				len--;
 | |
| 			} else {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if (!len)
 | |
| 		{
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		if (!ini_listener->ReadINI_RawLine(ptr, &curtok))
 | |
| 		{
 | |
| 			goto event_failed;
 | |
| 		}
 | |
| 
 | |
| 		if (*ptr == '[')
 | |
| 		{
 | |
| 			bool invalid_tokens = false;
 | |
| 			bool got_bracket = false;
 | |
| 			bool extra_tokens = false;
 | |
| 			char c;
 | |
| 			bool alnum;
 | |
| 			wchar_t wc;
 | |
| 
 | |
| 			for (size_t i=1; i<len; i++)
 | |
| 			{
 | |
| 				c = ptr[i];
 | |
| 				alnum = false;
 | |
| 
 | |
| 				if (c & (1<<7))
 | |
| 				{
 | |
| 					if (mbtowc(&wc, &ptr[i], len-i) != -1)
 | |
| 					{
 | |
| 						alnum = (iswalnum(wc) != 0);
 | |
| 						i += _GetUTF8CharBytes(&ptr[i]) - 1;
 | |
| 					}
 | |
| 				} else {
 | |
| 					alnum = (isalnum(c) != 0) || (g_ini_chartable1[(unsigned char)c] != 0);
 | |
| 				}
 | |
| 				if (!alnum)
 | |
| 				{
 | |
| 					/* First check - is this a bracket? */
 | |
| 					if (c == ']')
 | |
| 					{
 | |
| 						/* Yes! */
 | |
| 						got_bracket = true;
 | |
| 						/* If this isn't the last character... */
 | |
| 						if (i != len - 1)
 | |
| 						{
 | |
| 							extra_tokens = true;
 | |
| 						}
 | |
| 						/* terminate */
 | |
| 						ptr[i] = '\0';
 | |
| 						break;
 | |
| 					} else {
 | |
| 						/* n...No! Continue copying. */
 | |
| 						invalid_tokens = true;
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			/* Tell the handler */
 | |
| 			if (!ini_listener->ReadINI_NewSection(&ptr[1], invalid_tokens, got_bracket, extra_tokens, &curtok))
 | |
| 			{
 | |
| 				goto event_failed;
 | |
| 			}
 | |
| 		} else {
 | |
| 			char *key_ptr = ptr;
 | |
| 			char *val_ptr = NULL;
 | |
| 			char c;
 | |
| 			size_t first_space = 0;
 | |
| 			bool invalid_tokens = false;
 | |
| 			bool equal_token = false;
 | |
| 			bool quotes = false;
 | |
| 			bool alnum;
 | |
| 			wchar_t wc;
 | |
| 
 | |
| 			for (size_t i=0; i<len; i++)
 | |
| 			{
 | |
| 				c = ptr[i];
 | |
| 				alnum = false;
 | |
| 				/* is this an invalid char? */
 | |
| 				if (c & (1<<7))
 | |
| 				{
 | |
| 					if (mbtowc(&wc, &ptr[i], len-i) != -1)
 | |
| 					{
 | |
| 						alnum = (iswalnum(wc) != 0);
 | |
| 						i += _GetUTF8CharBytes(&ptr[i]) - 1;
 | |
| 					}
 | |
| 				} else {
 | |
| 					alnum = (isalnum(c) != 0) || (g_ini_chartable1[(unsigned char)c] != 0);
 | |
| 				}
 | |
| 
 | |
| 				if (!alnum)
 | |
| 				{
 | |
| 					if (g_ws_chartable[(unsigned char)c])
 | |
| 					{
 | |
| 						/* if it's a space, keep track of the first occurring space */
 | |
| 						if (!first_space)
 | |
| 						{
 | |
| 							first_space = i;
 | |
| 						}
 | |
| 					} else {
 | |
| 						if (c == '=')
 | |
| 						{
 | |
| 							/* if it's an equal sign, we're done with the key */
 | |
| 							if (first_space)
 | |
| 							{
 | |
| 								/* remove excess whitespace */
 | |
| 								key_ptr[first_space] = '\0';
 | |
| 							} else {
 | |
| 								/* remove the equal sign */
 | |
| 								key_ptr[i] = '\0';
 | |
| 							}
 | |
| 							if (ptr[++i] != '\0')
 | |
| 							{
 | |
| 								/* If this isn't the end, set next pointer */
 | |
| 								val_ptr = &ptr[i];
 | |
| 							}
 | |
| 							equal_token = true;
 | |
| 							break;
 | |
| 						} else {
 | |
| 							/* Mark that we got something invalid! */
 | |
| 							invalid_tokens = true;
 | |
| 							first_space = 0;
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			/* Now we need to parse the value, if any */
 | |
| 			if (val_ptr)
 | |
| 			{
 | |
| 				/* eat up spaces! there shouldn't be any h*/
 | |
| 				while ((*val_ptr != '\0') && g_ws_chartable[(unsigned char)*val_ptr] != 0)
 | |
| 				{
 | |
| 					val_ptr++;
 | |
| 				}
 | |
| 				if (*val_ptr == '\0')
 | |
| 				{
 | |
| 					val_ptr = NULL;
 | |
| 					goto skip_value;
 | |
| 				}
 | |
| 				/* Do we have an initial quote? If so, the parsing rules change! */
 | |
| 				if (*val_ptr == '"' && *val_ptr != '\0')
 | |
| 				{
 | |
| 					len = strlen(val_ptr);
 | |
| 					if (val_ptr[len-1] == '"')
 | |
| 					{
 | |
| 						/* Strip quotes! */
 | |
| 						val_ptr[--len] = '\0';
 | |
| 						val_ptr++;
 | |
| 						quotes = true;
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| skip_value:
 | |
| 			/* We're done! */
 | |
| 			curtok = val_ptr - buffer;
 | |
| 			if (!ini_listener->ReadINI_KeyValue(key_ptr, val_ptr, invalid_tokens, equal_token, quotes, &curtok))
 | |
| 			{
 | |
| 				curtok = 0;
 | |
| 				goto event_failed;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (line)
 | |
| 	{
 | |
| 		*line = curline;
 | |
| 	}
 | |
| 
 | |
| 	fclose(fp);
 | |
| 
 | |
| 	return true;
 | |
| 
 | |
| event_failed:
 | |
| 	if (line)
 | |
| 	{
 | |
| 		*line = curline;
 | |
| 	}
 | |
| 
 | |
| 	if (col)
 | |
| 	{
 | |
| 		*col = curtok;
 | |
| 	}
 | |
| 
 | |
| 	fclose(fp);
 | |
| 
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| const char *TextParsers::GetSMCErrorString(SMCError err)
 | |
| {
 | |
| 	static const char *s_errors[] = 
 | |
| 	{
 | |
| 		NULL,
 | |
| 		"Stream failed to open",
 | |
| 		"Stream returned read error",
 | |
| 		NULL,
 | |
| 		"Un-quoted section has invalid tokens",
 | |
| 		"Section declared without header",
 | |
| 		"Section declared with unknown tokens",
 | |
| 		"Section ending without a matching section beginning",
 | |
| 		"Section beginning without a matching ending",
 | |
| 		"Line contained too many invalid tokens",
 | |
| 		"Token buffer overflowed",
 | |
| 		"A property was declared outside of a section",
 | |
| 	};
 | |
| 
 | |
| 	if (err < SMCError_Okay || err > SMCError_InvalidProperty1)
 | |
| 	{
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	return s_errors[err];
 | |
| }
 |