added and tested UTF-8 support for ini files
--HG-- extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%40155
This commit is contained in:
		
							parent
							
								
									a5f4929c60
								
							
						
					
					
						commit
						89b125f6c1
					
				@ -1,11 +1,14 @@
 | 
				
			|||||||
#include <stdio.h>
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <ctype.h>
 | 
					#include <ctype.h>
 | 
				
			||||||
 | 
					#include <wctype.h>
 | 
				
			||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
#include "CTextParsers.h"
 | 
					#include "CTextParsers.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CTextParsers g_TextParse;
 | 
					CTextParsers g_TextParse;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int g_ini_chartable1[255] = {0};
 | 
					static int g_ini_chartable1[255] = {0};
 | 
				
			||||||
 | 
					static int g_ws_chartable[255] = {0};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CTextParsers::CTextParsers()
 | 
					CTextParsers::CTextParsers()
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@ -17,6 +20,17 @@ CTextParsers::CTextParsers()
 | 
				
			|||||||
	g_ini_chartable1['$'] = 1;
 | 
						g_ini_chartable1['$'] = 1;
 | 
				
			||||||
	g_ini_chartable1['?'] = 1;
 | 
						g_ini_chartable1['?'] = 1;
 | 
				
			||||||
	g_ini_chartable1['/'] = 1;
 | 
						g_ini_chartable1['/'] = 1;
 | 
				
			||||||
 | 
						g_ws_chartable['\n'] = 1;
 | 
				
			||||||
 | 
						g_ws_chartable['\v'] = 1;
 | 
				
			||||||
 | 
						g_ws_chartable['\r'] = 1;
 | 
				
			||||||
 | 
						g_ws_chartable['\t'] = 1;
 | 
				
			||||||
 | 
						g_ws_chartable['\f'] = 1;
 | 
				
			||||||
 | 
						g_ws_chartable[' '] = 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unsigned int CTextParsers::GetUTF8CharBytes(const char *stream)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return _GetUTF8CharBytes(stream);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col)
 | 
					bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col)
 | 
				
			||||||
@ -50,6 +64,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
 | 
				
			|||||||
	char buffer[2048];
 | 
						char buffer[2048];
 | 
				
			||||||
	char *ptr, *save_ptr;
 | 
						char *ptr, *save_ptr;
 | 
				
			||||||
	bool in_quote;
 | 
						bool in_quote;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (!feof(fp))
 | 
						while (!feof(fp))
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		curline++;
 | 
							curline++;
 | 
				
			||||||
@ -60,11 +75,25 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
 | 
				
			|||||||
			break;
 | 
								break;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* Preprocess the string before anything */
 | 
							//:TODO: this will only run once, so find a nice way to move it out of the while loop
 | 
				
			||||||
		ptr = buffer;
 | 
							/* If this is the first line, check the first three bytes for BOM */
 | 
				
			||||||
 | 
							if (curline == 1 && 
 | 
				
			||||||
 | 
								buffer[0] == (char)0xEF && 
 | 
				
			||||||
 | 
								buffer[1] == (char)0xBB && 
 | 
				
			||||||
 | 
								buffer[2] == (char)0xBF)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/* We have a UTF-8 marked file... skip these bytes */
 | 
				
			||||||
 | 
								ptr = &buffer[3];
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								ptr = buffer;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/***************************************************
 | 
				
			||||||
 | 
							 * We preprocess the string before parsing tokens! *
 | 
				
			||||||
 | 
							 ***************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* First strip beginning whitespace */
 | 
							/* First strip beginning whitespace */
 | 
				
			||||||
		while ((*ptr != '\0') && isspace(*ptr))
 | 
							while (*ptr != '\0' && g_ws_chartable[*ptr] != 0)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			ptr++;
 | 
								ptr++;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@ -117,7 +146,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
 | 
				
			|||||||
		/* Lastly, strip ending whitespace off */
 | 
							/* Lastly, strip ending whitespace off */
 | 
				
			||||||
		for (size_t i=len-1; i>=0 && i<len; i--)
 | 
							for (size_t i=len-1; i>=0 && i<len; i--)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			if (isspace(ptr[i]))
 | 
								if (g_ws_chartable[ptr[i]])
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				ptr[i] = '\0';
 | 
									ptr[i] = '\0';
 | 
				
			||||||
				len--;
 | 
									len--;
 | 
				
			||||||
@ -142,11 +171,25 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
 | 
				
			|||||||
			bool got_bracket = false;
 | 
								bool got_bracket = false;
 | 
				
			||||||
			bool extra_tokens = false;
 | 
								bool extra_tokens = false;
 | 
				
			||||||
			char c;
 | 
								char c;
 | 
				
			||||||
 | 
								bool alnum;
 | 
				
			||||||
 | 
								wchar_t wc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			for (size_t i=1; i<len; i++)
 | 
								for (size_t i=1; i<len; i++)
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				c = ptr[i];
 | 
									c = ptr[i];
 | 
				
			||||||
				if (!isalnum(c) && !g_ini_chartable1[c])
 | 
									alnum = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									if (c & (1<<7))
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										if (mbtowc(&wc, &ptr[i], len-i) != -1)
 | 
				
			||||||
 | 
										{
 | 
				
			||||||
 | 
											alnum = (iswalnum(wc) != 0);
 | 
				
			||||||
 | 
											i += _GetUTF8CharBytes(&ptr[i]) - 1;
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
									} else {
 | 
				
			||||||
 | 
										alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									if (!alnum)
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					/* First check - is this a bracket? */
 | 
										/* First check - is this a bracket? */
 | 
				
			||||||
					if (c == ']')
 | 
										if (c == ']')
 | 
				
			||||||
@ -181,14 +224,28 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
 | 
				
			|||||||
			bool invalid_tokens = false;
 | 
								bool invalid_tokens = false;
 | 
				
			||||||
			bool equal_token = false;
 | 
								bool equal_token = false;
 | 
				
			||||||
			bool quotes = false;
 | 
								bool quotes = false;
 | 
				
			||||||
 | 
								bool alnum;
 | 
				
			||||||
 | 
								wchar_t wc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			for (size_t i=0; i<len; i++)
 | 
								for (size_t i=0; i<len; i++)
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				c = ptr[i];
 | 
									c = ptr[i];
 | 
				
			||||||
 | 
									alnum = false;
 | 
				
			||||||
				/* is this an invalid char? */
 | 
									/* is this an invalid char? */
 | 
				
			||||||
				if (!isalnum(c) && !g_ini_chartable1[c])
 | 
									if (c & (1<<7))
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					if (isspace(c))
 | 
										if (mbtowc(&wc, &ptr[i], len-i) != -1)
 | 
				
			||||||
 | 
										{
 | 
				
			||||||
 | 
											alnum = (iswalnum(wc) != 0);
 | 
				
			||||||
 | 
											i += _GetUTF8CharBytes(&ptr[i]) - 1;
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
									} else {
 | 
				
			||||||
 | 
										alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									if (!alnum)
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										if (g_ws_chartable[c])
 | 
				
			||||||
					{
 | 
										{
 | 
				
			||||||
						/* if it's a space, keep track of the last space */
 | 
											/* if it's a space, keep track of the last space */
 | 
				
			||||||
						if (!first_space)
 | 
											if (!first_space)
 | 
				
			||||||
@ -227,7 +284,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
 | 
				
			|||||||
			if (val_ptr)
 | 
								if (val_ptr)
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				/* eat up spaces! there shouldn't be any h*/
 | 
									/* eat up spaces! there shouldn't be any h*/
 | 
				
			||||||
				while ((*val_ptr != '\0') && isspace(*val_ptr))
 | 
									while ((*val_ptr != '\0') && g_ws_chartable[*val_ptr] != 0)
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					val_ptr++;
 | 
										val_ptr++;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 | 
				
			|||||||
@ -5,9 +5,26 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
using namespace SourceMod;
 | 
					using namespace SourceMod;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline unsigned int _GetUTF8CharBytes(const char *stream)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned char c = *(unsigned char *)stream;
 | 
				
			||||||
 | 
						if (c & (1<<7))
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							if (c & (1<<5))
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								if (c & (1<<4))
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									return 4;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								return 3;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return 2;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CTextParsers : public ITextParsers
 | 
					class CTextParsers : public ITextParsers
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
	CTextParsers();
 | 
						CTextParsers();
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
@ -20,6 +37,8 @@ public:
 | 
				
			|||||||
		ITextListener_SMC *smc_listener, 
 | 
							ITextListener_SMC *smc_listener, 
 | 
				
			||||||
		unsigned int *line, 
 | 
							unsigned int *line, 
 | 
				
			||||||
		unsigned int *col);
 | 
							unsigned int *col);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						virtual unsigned int GetUTF8CharBytes(const char *stream);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern CTextParsers g_TextParse;
 | 
					extern CTextParsers g_TextParse;
 | 
				
			||||||
 | 
				
			|||||||
@ -238,6 +238,15 @@ namespace SourceMod
 | 
				
			|||||||
									ITextListener_SMC *smc_listener, 
 | 
														ITextListener_SMC *smc_listener, 
 | 
				
			||||||
									unsigned int *line, 
 | 
														unsigned int *line, 
 | 
				
			||||||
									unsigned int *col) =0;
 | 
														unsigned int *col) =0;
 | 
				
			||||||
 | 
						public:
 | 
				
			||||||
 | 
							/**
 | 
				
			||||||
 | 
							 * @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.
 | 
				
			||||||
 | 
							 * If the current character is not multi-byte, the function returns 1.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * @param stream		Pointer to multi-byte ANSI character string.
 | 
				
			||||||
 | 
							 * @return				Number of bytes in current character.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							virtual unsigned int GetUTF8CharBytes(const char *stream) =0;
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
#include <oslink.h>
 | 
					#include <oslink.h>
 | 
				
			||||||
#include "sourcemm_api.h"
 | 
					#include "sourcemm_api.h"
 | 
				
			||||||
#include "sm_version.h"
 | 
					#include "sm_version.h"
 | 
				
			||||||
 | 
					#include "CTextParsers.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SourceMod_Core g_SourceMod;
 | 
					SourceMod_Core g_SourceMod;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user