sourcemod/core/CTextParsers.cpp
David Anderson 65026ef57e finalized new structure and imported newly proposed plugin system API
added zlib to source tree
added VM API to source tree

--HG--
extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%40164
2006-11-08 06:30:20 +00:00

340 lines
6.6 KiB
C++

#include <stdio.h>
#include <ctype.h>
#include <wctype.h>
#include <string.h>
#include <stdlib.h>
#include "CTextParsers.h"
CTextParsers g_TextParse;
static int g_ini_chartable1[255] = {0};
static int g_ws_chartable[255] = {0};
CTextParsers::CTextParsers()
{
g_ini_chartable1['_'] = 1;
g_ini_chartable1['-'] = 1;
g_ini_chartable1[','] = 1;
g_ini_chartable1['+'] = 1;
g_ini_chartable1['.'] = 1;
g_ini_chartable1['$'] = 1;
g_ini_chartable1['?'] = 1;
g_ini_chartable1['/'] = 1;
g_ws_chartable['\n'] = 1;
g_ws_chartable['\v'] = 1;
g_ws_chartable['\r'] = 1;
g_ws_chartable['\t'] = 1;
g_ws_chartable['\f'] = 1;
g_ws_chartable[' '] = 1;
}
unsigned int CTextParsers::GetUTF8CharBytes(const char *stream)
{
return _GetUTF8CharBytes(stream);
}
bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col, bool strict)
{
/* This beast is a lot more rigorous than our INI friend. */
return false;
}
bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listener, unsigned int *line, unsigned int *col)
{
FILE *fp = fopen(file, "rt");
unsigned int curline = 0;
unsigned int curtok;
size_t len;
if (!fp)
{
if (line)
{
*line = 0;
}
return false;
}
char buffer[2048];
char *ptr, *save_ptr;
bool in_quote;
while (!feof(fp))
{
curline++;
curtok = 0;
buffer[0] = '\0';
if (fgets(buffer, sizeof(buffer), fp) == NULL)
{
break;
}
//:TODO: this will only run once, so find a nice way to move it out of the while loop
/* If this is the first line, check the first three bytes for BOM */
if (curline == 1 &&
buffer[0] == (char)0xEF &&
buffer[1] == (char)0xBB &&
buffer[2] == (char)0xBF)
{
/* We have a UTF-8 marked file... skip these bytes */
ptr = &buffer[3];
} else {
ptr = buffer;
}
/***************************************************
* We preprocess the string before parsing tokens! *
***************************************************/
/* First strip beginning whitespace */
while (*ptr != '\0' && g_ws_chartable[*ptr] != 0)
{
ptr++;
}
len = strlen(ptr);
if (!len)
{
continue;
}
/* Now search for comment characters */
in_quote = false;
save_ptr = ptr;
for (size_t i=0; i<len; i++,ptr++)
{
if (!in_quote)
{
switch (*ptr)
{
case '"':
{
in_quote = true;
break;
}
case ';':
{
/* Stop the loop */
len = i;
/* Terminate the string here */
*ptr = '\0';
break;
}
}
} else {
if (*ptr == '"')
{
in_quote = false;
}
}
}
if (!len)
{
continue;
}
ptr = save_ptr;
/* Lastly, strip ending whitespace off */
for (size_t i=len-1; i>=0 && i<len; i--)
{
if (g_ws_chartable[ptr[i]])
{
ptr[i] = '\0';
len--;
} else {
break;
}
}
if (!len)
{
continue;
}
if (!ini_listener->ReadINI_RawLine(ptr, &curtok))
{
goto event_failed;
}
if (*ptr == '[')
{
bool invalid_tokens = false;
bool got_bracket = false;
bool extra_tokens = false;
char c;
bool alnum;
wchar_t wc;
for (size_t i=1; i<len; i++)
{
c = ptr[i];
alnum = false;
if (c & (1<<7))
{
if (mbtowc(&wc, &ptr[i], len-i) != -1)
{
alnum = (iswalnum(wc) != 0);
i += _GetUTF8CharBytes(&ptr[i]) - 1;
}
} else {
alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
}
if (!alnum)
{
/* First check - is this a bracket? */
if (c == ']')
{
/* Yes! */
got_bracket = true;
/* If this isn't the last character... */
if (i != len - 1)
{
extra_tokens = true;
}
/* terminate */
ptr[i] = '\0';
break;
} else {
/* n...No! Continue copying. */
invalid_tokens = true;
}
}
}
/* Tell the handler */
if (!ini_listener->ReadINI_NewSection(&ptr[1], invalid_tokens, got_bracket, extra_tokens, &curtok))
{
goto event_failed;
}
} else {
char *key_ptr = ptr;
char *val_ptr = NULL;
char c;
size_t first_space = 0;
bool invalid_tokens = false;
bool equal_token = false;
bool quotes = false;
bool alnum;
wchar_t wc;
for (size_t i=0; i<len; i++)
{
c = ptr[i];
alnum = false;
/* is this an invalid char? */
if (c & (1<<7))
{
if (mbtowc(&wc, &ptr[i], len-i) != -1)
{
alnum = (iswalnum(wc) != 0);
i += _GetUTF8CharBytes(&ptr[i]) - 1;
}
} else {
alnum = (isalnum(c) != 0) || (g_ini_chartable1[c] != 0);
}
if (!alnum)
{
if (g_ws_chartable[c])
{
/* if it's a space, keep track of the first occurring space */
if (!first_space)
{
first_space = i;
}
} else {
if (c == '=')
{
/* if it's an equal sign, we're done with the key */
if (first_space)
{
/* remove excess whitespace */
key_ptr[first_space] = '\0';
} else {
/* remove the equal sign */
key_ptr[i] = '\0';
}
if (ptr[++i] != '\0')
{
/* If this isn't the end, set next pointer */
val_ptr = &ptr[i];
}
equal_token = true;
break;
} else {
/* Mark that we got something invalid! */
invalid_tokens = true;
first_space = 0;
}
}
}
}
/* Now we need to parse the value, if any */
if (val_ptr)
{
/* eat up spaces! there shouldn't be any h*/
while ((*val_ptr != '\0') && g_ws_chartable[*val_ptr] != 0)
{
val_ptr++;
}
if (*val_ptr == '\0')
{
val_ptr = NULL;
goto skip_value;
}
/* Do we have an initial quote? If so, the parsing rules change! */
if (*val_ptr == '"' && *val_ptr != '\0')
{
len = strlen(val_ptr);
if (val_ptr[len-1] == '"')
{
/* Strip quotes! */
val_ptr[--len] = '\0';
val_ptr++;
quotes = true;
}
}
}
skip_value:
/* We're done! */
curtok = val_ptr - buffer;
if (!ini_listener->ReadINI_KeyValue(key_ptr, val_ptr, invalid_tokens, equal_token, quotes, &curtok))
{
curtok = 0;
goto event_failed;
}
}
}
if (line)
{
*line = curline;
}
fclose(fp);
return true;
event_failed:
if (line)
{
*line = curline;
}
if (col)
{
*col = curtok;
}
fclose(fp);
return false;
}