added and tested UTF-8 support for ini files
--HG-- extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%40155
This commit is contained in:
parent
a5f4929c60
commit
89b125f6c1
@ -1,11 +1,14 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <wctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include "CTextParsers.h"
|
#include "CTextParsers.h"
|
||||||
|
|
||||||
CTextParsers g_TextParse;
|
CTextParsers g_TextParse;
|
||||||
|
|
||||||
static int g_ini_chartable1[255] = {0};
|
static int g_ini_chartable1[255] = {0};
|
||||||
|
static int g_ws_chartable[255] = {0};
|
||||||
|
|
||||||
CTextParsers::CTextParsers()
|
CTextParsers::CTextParsers()
|
||||||
{
|
{
|
||||||
@ -17,6 +20,17 @@ CTextParsers::CTextParsers()
|
|||||||
g_ini_chartable1['$'] = 1;
|
g_ini_chartable1['$'] = 1;
|
||||||
g_ini_chartable1['?'] = 1;
|
g_ini_chartable1['?'] = 1;
|
||||||
g_ini_chartable1['/'] = 1;
|
g_ini_chartable1['/'] = 1;
|
||||||
|
g_ws_chartable['\n'] = 1;
|
||||||
|
g_ws_chartable['\v'] = 1;
|
||||||
|
g_ws_chartable['\r'] = 1;
|
||||||
|
g_ws_chartable['\t'] = 1;
|
||||||
|
g_ws_chartable['\f'] = 1;
|
||||||
|
g_ws_chartable[' '] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int CTextParsers::GetUTF8CharBytes(const char *stream)
|
||||||
|
{
|
||||||
|
return _GetUTF8CharBytes(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col)
|
bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col)
|
||||||
@ -50,6 +64,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
|
|||||||
char buffer[2048];
|
char buffer[2048];
|
||||||
char *ptr, *save_ptr;
|
char *ptr, *save_ptr;
|
||||||
bool in_quote;
|
bool in_quote;
|
||||||
|
|
||||||
while (!feof(fp))
|
while (!feof(fp))
|
||||||
{
|
{
|
||||||
curline++;
|
curline++;
|
||||||
@ -60,11 +75,25 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Preprocess the string before anything */
|
//:TODO: this will only run once, so find a nice way to move it out of the while loop
|
||||||
ptr = buffer;
|
/* If this is the first line, check the first three bytes for BOM */
|
||||||
|
if (curline == 1 &&
|
||||||
|
buffer[0] == (char)0xEF &&
|
||||||
|
buffer[1] == (char)0xBB &&
|
||||||
|
buffer[2] == (char)0xBF)
|
||||||
|
{
|
||||||
|
/* We have a UTF-8 marked file... skip these bytes */
|
||||||
|
ptr = &buffer[3];
|
||||||
|
} else {
|
||||||
|
ptr = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************
|
||||||
|
* We preprocess the string before parsing tokens! *
|
||||||
|
***************************************************/
|
||||||
|
|
||||||
/* First strip beginning whitespace */
|
/* First strip beginning whitespace */
|
||||||
while ((*ptr != '\0') && isspace(*ptr))
|
while (*ptr != '\0' && g_ws_chartable[*ptr] != 0)
|
||||||
{
|
{
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
@ -117,7 +146,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
|
|||||||
/* Lastly, strip ending whitespace off */
|
/* Lastly, strip ending whitespace off */
|
||||||
for (size_t i=len-1; i>=0 && i<len; i--)
|
for (size_t i=len-1; i>=0 && i<len; i--)
|
||||||
{
|
{
|
||||||
if (isspace(ptr[i]))
|
if (g_ws_chartable[ptr[i]])
|
||||||
{
|
{
|
||||||
ptr[i] = '\0';
|
ptr[i] = '\0';
|
||||||
len--;
|
len--;
|
||||||
@ -142,11 +171,25 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
|
|||||||
bool got_bracket = false;
|
bool got_bracket = false;
|
||||||
bool extra_tokens = false;
|
bool extra_tokens = false;
|
||||||
char c;
|
char c;
|
||||||
|
bool alnum;
|
||||||
|
wchar_t wc;
|
||||||
|
|
||||||
for (size_t i=1; i<len; i++)
|
for (size_t i=1; i<len; i++)
|
||||||
{
|
{
|
||||||
c = ptr[i];
|
c = ptr[i];
|
||||||
if (!isalnum(c) && !g_ini_chartable1[c])
|
alnum = false;
|
||||||
|
|
||||||
|
if (c & (1<<7))
|
||||||
|
{
|
||||||
|
if (mbtowc(&wc, &ptr[i], len-i) != -1)
|
||||||
|
{
|
||||||
|
alnum = (iswalnum(wc) != 0);
|
||||||
|
i += _GetUTF8CharBytes(&ptr[i]) - 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
|
||||||
|
}
|
||||||
|
if (!alnum)
|
||||||
{
|
{
|
||||||
/* First check - is this a bracket? */
|
/* First check - is this a bracket? */
|
||||||
if (c == ']')
|
if (c == ']')
|
||||||
@ -181,14 +224,28 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
|
|||||||
bool invalid_tokens = false;
|
bool invalid_tokens = false;
|
||||||
bool equal_token = false;
|
bool equal_token = false;
|
||||||
bool quotes = false;
|
bool quotes = false;
|
||||||
|
bool alnum;
|
||||||
|
wchar_t wc;
|
||||||
|
|
||||||
for (size_t i=0; i<len; i++)
|
for (size_t i=0; i<len; i++)
|
||||||
{
|
{
|
||||||
c = ptr[i];
|
c = ptr[i];
|
||||||
|
alnum = false;
|
||||||
/* is this an invalid char? */
|
/* is this an invalid char? */
|
||||||
if (!isalnum(c) && !g_ini_chartable1[c])
|
if (c & (1<<7))
|
||||||
{
|
{
|
||||||
if (isspace(c))
|
if (mbtowc(&wc, &ptr[i], len-i) != -1)
|
||||||
|
{
|
||||||
|
alnum = (iswalnum(wc) != 0);
|
||||||
|
i += _GetUTF8CharBytes(&ptr[i]) - 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!alnum)
|
||||||
|
{
|
||||||
|
if (g_ws_chartable[c])
|
||||||
{
|
{
|
||||||
/* if it's a space, keep track of the last space */
|
/* if it's a space, keep track of the last space */
|
||||||
if (!first_space)
|
if (!first_space)
|
||||||
@ -227,7 +284,7 @@ bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listen
|
|||||||
if (val_ptr)
|
if (val_ptr)
|
||||||
{
|
{
|
||||||
/* eat up spaces! there shouldn't be any h*/
|
/* eat up spaces! there shouldn't be any h*/
|
||||||
while ((*val_ptr != '\0') && isspace(*val_ptr))
|
while ((*val_ptr != '\0') && g_ws_chartable[*val_ptr] != 0)
|
||||||
{
|
{
|
||||||
val_ptr++;
|
val_ptr++;
|
||||||
}
|
}
|
||||||
|
@ -5,9 +5,26 @@
|
|||||||
|
|
||||||
using namespace SourceMod;
|
using namespace SourceMod;
|
||||||
|
|
||||||
|
inline unsigned int _GetUTF8CharBytes(const char *stream)
|
||||||
|
{
|
||||||
|
unsigned char c = *(unsigned char *)stream;
|
||||||
|
if (c & (1<<7))
|
||||||
|
{
|
||||||
|
if (c & (1<<5))
|
||||||
|
{
|
||||||
|
if (c & (1<<4))
|
||||||
|
{
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
class CTextParsers : public ITextParsers
|
class CTextParsers : public ITextParsers
|
||||||
{
|
{
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CTextParsers();
|
CTextParsers();
|
||||||
public:
|
public:
|
||||||
@ -20,6 +37,8 @@ public:
|
|||||||
ITextListener_SMC *smc_listener,
|
ITextListener_SMC *smc_listener,
|
||||||
unsigned int *line,
|
unsigned int *line,
|
||||||
unsigned int *col);
|
unsigned int *col);
|
||||||
|
|
||||||
|
virtual unsigned int GetUTF8CharBytes(const char *stream);
|
||||||
};
|
};
|
||||||
|
|
||||||
extern CTextParsers g_TextParse;
|
extern CTextParsers g_TextParse;
|
||||||
|
@ -238,6 +238,15 @@ namespace SourceMod
|
|||||||
ITextListener_SMC *smc_listener,
|
ITextListener_SMC *smc_listener,
|
||||||
unsigned int *line,
|
unsigned int *line,
|
||||||
unsigned int *col) =0;
|
unsigned int *col) =0;
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.
|
||||||
|
* If the current character is not multi-byte, the function returns 1.
|
||||||
|
*
|
||||||
|
* @param stream Pointer to multi-byte ANSI character string.
|
||||||
|
* @return Number of bytes in current character.
|
||||||
|
*/
|
||||||
|
virtual unsigned int GetUTF8CharBytes(const char *stream) =0;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <oslink.h>
|
#include <oslink.h>
|
||||||
#include "sourcemm_api.h"
|
#include "sourcemm_api.h"
|
||||||
#include "sm_version.h"
|
#include "sm_version.h"
|
||||||
|
#include "CTextParsers.h"
|
||||||
|
|
||||||
SourceMod_Core g_SourceMod;
|
SourceMod_Core g_SourceMod;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user