added and tested UTF-8 support for ini files

--HG--
extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%40155
This commit is contained in:
David Anderson 2006-11-07 00:53:06 +00:00
parent a5f4929c60
commit 89b125f6c1
4 changed files with 98 additions and 12 deletions

View File

@ -1,11 +1,14 @@
#include <stdio.h>
#include <ctype.h>
#include <wctype.h>
#include <string.h>
#include <stdlib.h>
#include "CTextParsers.h"
CTextParsers g_TextParse;
static int g_ini_chartable1[255] = {0};
static int g_ws_chartable[255] = {0};
CTextParsers::CTextParsers()
{
@ -17,6 +20,17 @@ CTextParsers::CTextParsers()
g_ini_chartable1['$'] = 1;
g_ini_chartable1['?'] = 1;
g_ini_chartable1['/'] = 1;
g_ws_chartable['\n'] = 1;
g_ws_chartable['\v'] = 1;
g_ws_chartable['\r'] = 1;
g_ws_chartable['\t'] = 1;
g_ws_chartable['\f'] = 1;
g_ws_chartable[' '] = 1;
}
unsigned int CTextParsers::GetUTF8CharBytes(const char *stream)
{
return _GetUTF8CharBytes(stream);
}
bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col)
@ -50,6 +64,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
char buffer[2048];
char *ptr, *save_ptr;
bool in_quote;
while (!feof(fp))
{
curline++;
@ -59,12 +74,26 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
{
break;
}
/* Preprocess the string before anything */
ptr = buffer;
//:TODO: this will only run once, so find a nice way to move it out of the while loop
/* If this is the first line, check the first three bytes for BOM */
if (curline == 1 &&
buffer[0] == (char)0xEF &&
buffer[1] == (char)0xBB &&
buffer[2] == (char)0xBF)
{
/* We have a UTF-8 marked file... skip these bytes */
ptr = &buffer[3];
} else {
ptr = buffer;
}
/***************************************************
* We preprocess the string before parsing tokens! *
***************************************************/
/* First strip beginning whitespace */
while ((*ptr != '\0') && isspace(*ptr))
while (*ptr != '\0' && g_ws_chartable[*ptr] != 0)
{
ptr++;
}
@ -117,7 +146,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
/* Lastly, strip ending whitespace off */
for (size_t i=len-1; i>=0 && i<len; i--)
{
if (isspace(ptr[i]))
if (g_ws_chartable[ptr[i]])
{
ptr[i] = '\0';
len--;
@ -142,11 +171,25 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
bool got_bracket = false;
bool extra_tokens = false;
char c;
bool alnum;
wchar_t wc;
for (size_t i=1; i<len; i++)
{
c = ptr[i];
if (!isalnum(c) && !g_ini_chartable1[c])
alnum = false;
if (c & (1<<7))
{
if (mbtowc(&wc, &ptr[i], len-i) != -1)
{
alnum = (iswalnum(wc) != 0);
i += _GetUTF8CharBytes(&ptr[i]) - 1;
}
} else {
alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
}
if (!alnum)
{
/* First check - is this a bracket? */
if (c == ']')
@ -181,14 +224,28 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
bool invalid_tokens = false;
bool equal_token = false;
bool quotes = false;
bool alnum;
wchar_t wc;
for (size_t i=0; i<len; i++)
{
c = ptr[i];
alnum = false;
/* is this an invalid char? */
if (!isalnum(c) && !g_ini_chartable1[c])
if (c & (1<<7))
{
if (isspace(c))
if (mbtowc(&wc, &ptr[i], len-i) != -1)
{
alnum = (iswalnum(wc) != 0);
i += _GetUTF8CharBytes(&ptr[i]) - 1;
}
} else {
alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
}
if (!alnum)
{
if (g_ws_chartable[c])
{
/* if it's a space, keep track of the last space */
if (!first_space)
@ -227,7 +284,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
if (val_ptr)
{
/* eat up spaces! there shouldn't be any h*/
while ((*val_ptr != '\0') && isspace(*val_ptr))
while ((*val_ptr != '\0') && g_ws_chartable[*val_ptr] != 0)
{
val_ptr++;
}

View File

@ -5,9 +5,26 @@
using namespace SourceMod;
inline unsigned int _GetUTF8CharBytes(const char *stream)
{
unsigned char c = *(unsigned char *)stream;
if (c & (1<<7))
{
if (c & (1<<5))
{
if (c & (1<<4))
{
return 4;
}
return 3;
}
return 2;
}
return 1;
}
class CTextParsers : public ITextParsers
{
public:
CTextParsers();
public:
@ -20,6 +37,8 @@ public:
ITextListener_SMC *smc_listener,
unsigned int *line,
unsigned int *col);
virtual unsigned int GetUTF8CharBytes(const char *stream);
};
extern CTextParsers g_TextParse;

View File

@ -238,6 +238,15 @@ namespace SourceMod
ITextListener_SMC *smc_listener,
unsigned int *line,
unsigned int *col) =0;
public:
/**
* @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.
* If the current character is not multi-byte, the function returns 1.
*
* @param stream Pointer to multi-byte ANSI character string.
* @return Number of bytes in current character.
*/
virtual unsigned int GetUTF8CharBytes(const char *stream) =0;
};
};

View File

@ -1,6 +1,7 @@
#include <oslink.h>
#include "sourcemm_api.h"
#include "sm_version.h"
#include "CTextParsers.h"
SourceMod_Core g_SourceMod;