committed shiny new SMC parser (really Valve XML or whatever)

--HG--
extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%40202
This commit is contained in:
David Anderson 2006-12-08 20:54:49 +00:00
parent d7c3c577ed
commit 89c75b1940
5 changed files with 648 additions and 32 deletions

View File

@ -3,6 +3,7 @@
#include <wctype.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "CTextParsers.h"
CTextParsers g_TextParse;
@ -33,13 +34,572 @@ unsigned int CTextParsers::GetUTF8CharBytes(const char *stream)
return _GetUTF8CharBytes(stream);
}
bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col, bool strict)
{
/* This beast is a lot more rigorous than our INI friend. */
/**
* Character streams
*/
return false;
struct CharStream
{
const char *curpos;
};
bool CharStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read)
{
CharStream *srdr = (CharStream *)stream;
const char *ptr = srdr->curpos;
for (size_t i=0; i<maxlength; i++)
{
if (*ptr == '\0')
{
break;
}
*buffer++ = *ptr++;
}
*read = ptr - srdr->curpos;
srdr->curpos = ptr;
return true;
}
SMCParseError CTextParsers::ParseString_SMC(const char *stream,
ITextListener_SMC *smc,
unsigned int *line,
unsigned int *col)
{
CharStream srdr = { stream };
return ParseStream_SMC(&srdr, CharStreamReader, smc, line, col);
}
/**
* File streams
*/
bool FileStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read)
{
size_t num = fread(buffer, 1, maxlength, (FILE *)stream);
*read = static_cast<unsigned int>(num);
if (num == 0 && feof((FILE *)stream))
{
return true;
}
return (ferror((FILE *)stream) == 0);
}
SMCParseError CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc, unsigned int *line, unsigned int *col)
{
FILE *fp = fopen(file, "rt");
if (!fp)
{
return SMCParse_StreamOpen;
}
SMCParseError result = ParseStream_SMC(fp, FileStreamReader, smc, line, col);
fclose(fp);
return result;
}
/**
* Raw parsing of streams with helper functions
*/
struct StringInfo
{
StringInfo() : quoted(false), ptr(NULL), end(NULL), special(false) { }
char *ptr;
char *end;
bool special;
bool quoted;
};
const char *FixupString(StringInfo &data)
{
if (!data.ptr)
{
return NULL;
}
if (data.quoted)
{
data.ptr++;
}
#if defined _DEBUG
else {
/* A string will never have beginning whitespace because we ignore it in the stream.
* Furthermore, if there is trailing whitespace, the end ptr will point to it, so it is valid
* to overwrite! Lastly, the last character must be whitespace or a comment/invalid character.
*/
}
#endif
if (data.special)
{
//:TODO: this string has special tokens in it, like \, and we must
//resolve these before passing the string back to the app
}
*(data.end) = '\0';
return data.ptr;
}
const char *rotate(StringInfo info[3])
{
if (info[2].ptr != NULL)
{
return info[2].ptr;
}
if (info[0].ptr != NULL)
{
info[2] = info[1];
info[1] = info[0];
info[0] = StringInfo();
}
return NULL;
}
void scrap(StringInfo info[3])
{
info[2] = StringInfo();
info[1] = StringInfo();
info[0] = StringInfo();
}
void reloc(StringInfo &data, unsigned int bytes)
{
if (data.ptr)
{
data.ptr -= bytes;
}
if (data.end)
{
data.end -= bytes;
}
}
char *lowstring(StringInfo info[3])
{
for (int i=2; i>=0; i--)
{
if (info[i].ptr)
{
return info[i].ptr;
}
}
return NULL;
}
SMCParseError CTextParsers::ParseStream_SMC(void *stream,
STREAMREADER srdr,
ITextListener_SMC *smc,
unsigned int *line,
unsigned int *col)
{
char in_buf[4096];
char *reparse_point = NULL;
char *parse_point = in_buf;
char *line_begin = in_buf;
unsigned int read;
unsigned int curline = 1;
unsigned int curtok = 0;
unsigned int curlevel = 0;
bool in_quote = false;
bool ignoring = false;
bool eol_comment = false;
bool ml_comment = false;
unsigned int i;
SMCParseError err = SMCParse_Okay;
SMCParseResult res;
char c;
StringInfo strings[3];
StringInfo emptystring;
smc->ReadSMC_ParseStart();
while (srdr(stream, parse_point, sizeof(in_buf) - (parse_point - line_begin) - 1, &read))
{
if (!read)
{
err = SMCParse_StreamError;
goto failed;
}
/* :TODO: do this outside of the main loop somehow
* This checks for BOM markings
*/
if (curline == 1 &&
in_buf[0] == (char)0xEF &&
in_buf[1] == (char)0xBB &&
in_buf[2] == (char)0xBF)
{
parse_point = &in_buf[3];
}
if (reparse_point)
{
read += (parse_point - reparse_point);
parse_point = reparse_point;
reparse_point = NULL;
}
for (i=0; i<read; i++)
{
c = parse_point[i];
if (c == '\n')
{
/* If we got a newline, there's a lot of things that could have happened in the interim.
* First, let's make sure the staged strings are rotated.
*/
if (strings[0].ptr)
{
strings[0].end = &parse_point[i];
if (rotate(strings) != NULL)
{
err = SMCParse_InvalidTokens;
goto failed;
}
}
/* Next, let's clear some line-based values that may no longer have meaning */
eol_comment = false;
in_quote = false;
if (ignoring && !ml_comment)
{
ignoring = false;
}
/* Pass the raw line onto the listener */
if ((res=smc->ReadSMC_RawLine(line_begin, curline)) != SMCParse_Continue)
{
err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay;
goto failed;
}
/* Now we check the sanity of our staged strings! */
if (strings[2].ptr)
{
if (!curlevel)
{
err = SMCParse_InvalidProperty1;
goto failed;
}
/* Assume the next string is a property and pass the info on. */
if ((res=smc->ReadSMC_KeyValue(
FixupString(strings[2]),
FixupString(strings[1]),
strings[2].quoted,
strings[1].quoted)) != SMCParse_Continue)
{
err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay;
goto failed;
}
scrap(strings);
}
/* Change the states for the next line */
curtok = 0;
curline++;
line_begin = &parse_point[i+1]; //Note: safe because this gets relocated later
} else if (ignoring) {
if (in_quote)
{
/* If i was 0, this case is impossible due to reparsing */
if ((i != 0) && c == '"' && parse_point[i-1] != '\\')
{
/* If we reached a quote in an ignore phase,
* we're staging a string and we must rotate it out.
*/
in_quote = false;
ignoring = false;
/* Set our info */
strings[0].end = &parse_point[i];
strings[0].quoted = true;
if (rotate(strings) != NULL)
{
/* If we rotated too many strings, there was too much crap on one line */
err = SMCParse_InvalidTokens;
goto failed;
}
} else if (c == '\\' && i == (read - 1)) {
strings[0].special = true;
reparse_point = &parse_point[i];
break;
}
} else if (ml_comment) {
if (c == '*')
{
/* Check if we need to get more input first */
if (i == read - 1)
{
reparse_point = &parse_point[i];
break;
}
if (parse_point[i+1] == '/')
{
ml_comment = false;
ignoring = false;
/* We should not be staging anything right now. */
assert(strings[0].ptr == NULL);
/* Advance the input stream so we don't choke on this token */
i++;
curtok++;
}
}
}
} else {
/* Check if we're whitespace or not */
if (!g_ws_chartable[c])
{
bool restage = false;
/* Check various special tokens:
* ;
* //
* /*
* {
* }
*/
if (c == ';' || c == '/')
{
/* If it's a line-based comment (that is, ; or //)
* we will need to scrap everything until the end of the line.
*/
if (c == '/')
{
if (i == read - 1)
{
/* If we reached the end of the look-ahead, we need to re-check our input.
* Breaking out will force this to be the new reparse point!
*/
reparse_point = &reparse_point[i];
break;
}
if (parse_point[i + 1] == '/')
{
/* standard comment */
ignoring = true;
eol_comment = true;
restage = true;
} else if (parse_point[i+1] == '*') {
/* inline comment - start ignoring */
ignoring = true;
ml_comment = true;
/* yes, we restage, meaning that:
* STR/*stuff* /ING (space because ml comments don't nest in C++)
* will not generate 'STRING', but rather 'STR' and 'ING'.
* This should be a rare occurrence and is done here for convenience.
*/
restage = true;
}
} else {
ignoring = true;
eol_comment = true;
restage = true;
}
} else if (c == '{') {
/* If we are staging a string, we must rotate here */
if (strings[0].ptr)
{
/* We have unacceptable tokens on this line */
if (rotate(strings) != NULL)
{
err = SMCParse_InvalidSection1;
goto failed;
}
}
/* Sections must always be alone */
if (strings[2].ptr != NULL)
{
err = SMCParse_InvalidSection1;
goto failed;
} else if (strings[1].ptr == NULL) {
err = SMCParse_InvalidSection2;
goto failed;
}
if ((res=smc->ReadSMC_NewSection(FixupString(strings[1]), strings[1].quoted))
!= SMCParse_Continue)
{
err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay;
goto failed;
}
strings[1] = emptystring;
curlevel++;
} else if (c == '}') {
/* Unlike our matching friend, this can be on the same line as something prior */
if (rotate(strings) != NULL)
{
err = SMCParse_InvalidSection3;
goto failed;
}
if (strings[2].ptr)
{
if (!curlevel)
{
err = SMCParse_InvalidProperty1;
goto failed;
}
if ((res=smc->ReadSMC_KeyValue(
FixupString(strings[2]),
FixupString(strings[1]),
strings[2].quoted,
strings[1].quoted))
!= SMCParse_Continue)
{
err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay;
goto failed;
}
} else if (strings[1].ptr) {
err = SMCParse_InvalidSection3;
goto failed;
} else if (!curlevel) {
err = SMCParse_InvalidSection4;
goto failed;
}
/* Now it's safe to leave the section */
scrap(strings);
if ((res=smc->ReadSMC_LeavingSection()) != SMCParse_Continue)
{
err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay;
goto failed;
}
curlevel--;
} else if (c == '"') {
/* If we get a quote mark, we always restage, but we need to do it beforehand */
if (strings[0].ptr)
{
strings[0].end = &parse_point[i];
if (rotate(strings) != NULL)
{
err = SMCParse_InvalidTokens;
goto failed;
}
}
strings[0].ptr = &parse_point[i];
in_quote = true;
ignoring = true;
} else if (!strings[0].ptr) {
/* If we have no string, we must start one */
strings[0].ptr = &parse_point[i];
}
if (restage && strings[0].ptr)
{
strings[0].end = &parse_point[i];
if (rotate(strings) != NULL)
{
err = SMCParse_InvalidTokens;
goto failed;
}
}
} else {
/* If we're eating a string and get whitespace, we need to restage.
* (Note that if we are quoted, this is being ignored)
*/
if (strings[0].ptr)
{
/*
* The specification says the second string in a pair does not need to be quoted.
* Thus, we check if there's already a string on the stack.
* If there's a newline, we always rotate so the newline has an empty starter.
*/
if (!strings[1].ptr)
{
/* There's no string, so we must move this one down and eat up another */
strings[0].end = &parse_point[i];
rotate(strings);
} else if (!strings[1].quoted) {
err = SMCParse_InvalidTokens;
goto failed;
}
}
}
}
/* Advance which token we're on */
curtok++;
}
if (line_begin != in_buf)
{
/* The line buffer has advanced, so it's safe to copy N bytes back to the beginning.
* What's N? N is the lowest point we're currently relying on.
*/
char *stage = lowstring(strings);
if (!stage || stage > line_begin)
{
stage = line_begin;
}
unsigned int bytes = read - (stage - parse_point);
/* It is now safe to delete everything before the staged point */
memmove(in_buf, stage, bytes);
/* Calculate the number of bytes in the new buffer */
bytes = stage - in_buf;
/* Relocate all the cached pointers to our new base */
line_begin -= bytes;
reloc(strings[0], bytes);
reloc(strings[1], bytes);
reloc(strings[2], bytes);
if (reparse_point)
{
reparse_point -= bytes;
}
if (parse_point)
{
parse_point = &parse_point[read];
parse_point -= bytes;
}
} else if (read == sizeof(in_buf) - 1) {
err = SMCParse_TokenOverflow;
goto failed;
}
}
/* If we're done parsing and there are tokens left over... */
if (curlevel)
{
err = SMCParse_InvalidSection5;
goto failed;
} else if (strings[0].ptr || strings[1].ptr) {
err = SMCParse_InvalidTokens;
goto failed;
}
smc->ReadSMC_ParseEnd(false, false);
return SMCParse_Okay;
failed:
if (line)
{
*line = curline;
}
if (col)
{
*col = curtok;
}
return err;
}
/**
* INI parser
*/
bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listener, unsigned int *line, unsigned int *col)
{
FILE *fp = fopen(file, "rt");

View File

@ -23,6 +23,15 @@ inline unsigned int _GetUTF8CharBytes(const char *stream)
return 1;
}
/**
* @param void * IN: Stream pointer
* @param char * IN/OUT: Stream buffer
* @param size_t IN: Maximum size of buffer
* @param unsigned int * OUT: Number of bytes read (0 = end of stream)
* @return True on success, false on failure
*/
typedef bool (*STREAMREADER)(void *, char *, size_t, unsigned int *);
class CTextParsers : public ITextParsers
{
public:
@ -33,13 +42,23 @@ public:
unsigned int *line,
unsigned int *col);
virtual bool ParseFile_SMC(const char *file,
virtual SMCParseError ParseFile_SMC(const char *file,
ITextListener_SMC *smc_listener,
unsigned int *line,
unsigned int *col,
bool strict);
unsigned int *col);
virtual unsigned int GetUTF8CharBytes(const char *stream);
private:
SMCParseError ParseString_SMC(const char *stream,
ITextListener_SMC *smc,
unsigned int *line,
unsigned int *col);
SMCParseError ParseStream_SMC(void *stream,
STREAMREADER srdr,
ITextListener_SMC *smc,
unsigned int *line,
unsigned int *col);
};
extern CTextParsers g_TextParse;

View File

@ -102,17 +102,18 @@ namespace SourceMod
};
/**
* :TODO: write this in CFG format so it makes sense
* :TODO: write this in CFG (context free grammar) format so it makes sense
*
* The SMC file format is defined as:
* WHITESPACE: 0x20, \n, \t, \r
* IDENTIFIER: Any ASCII character EXCLUDING ", ', :, WHITESPACE
* STRING: Any set of symbols
* IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, /*, or WHITESPACE.
* STRING: Any set of symbols enclosed in quotes.
* Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER.
*
* Basic syntax is comprised of SECTIONBLOCKs.
* A SECTIONBLOCK defined as:
*
* SECTION: "SECTIONNAME"
* SECTIONNAME
* {
* OPTION
* }
@ -121,11 +122,14 @@ namespace SourceMod
* A new line will terminate an OPTION, but there can be more than one OPTION per line.
* OPTION is defined any of:
* "KEY" "VALUE"
* "SINGLEKEY"
* SECTIONBLOCK
*
* SECTION is an IDENTIFIER
* SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings
* SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed.
* If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace).
* If KEY is not enclosed in quotes, the key is terminated at first whitespace.
* If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace).
* The VALUE may have inner quotes, but the key string may not.
*
* For an example, see configs/permissions.cfg
*
@ -135,17 +139,33 @@ namespace SourceMod
* ;<TEXT>
* //<TEXT>
* /*<TEXT> */
enum SMCParseResult
{
SMCParse_Continue, //continue parsing
SMCParse_Halt, //stop parsing here
SMCParse_HaltFail //stop parsing and return failure
};
enum SMCParseError
{
SMCParse_Okay, //no error
SMCParse_StreamOpen, //stream failed to open
SMCParse_StreamError, //the stream died... somehow
SMCParse_Custom, //a custom handler threw an error
SMCParse_InvalidSection1, //a section was declared without quotes, and had extra tokens
SMCParse_InvalidSection2, //a section was declared without any header
SMCParse_InvalidSection3, //a section ending was declared with too many unknown tokens
SMCParse_InvalidSection4, //a section ending has no matching beginning
SMCParse_InvalidSection5, //a section beginning has no matching ending
SMCParse_InvalidTokens, //there were too many unidentifiable strings on one line
SMCParse_TokenOverflow, //the token buffer overflowed
SMCParse_InvalidProperty1, //a property was declared outside of any section
};
class ITextListener_SMC
{
public:
enum SMCParseResult
{
SMCParse_Continue, //continue parsing
SMCParse_SkipSection, //skip the rest of the current section
SMCParse_Halt, //stop parsing here
SMCParse_HaltFail //stop parsing and return failure
};
/**
* @brief Called when starting parsing.
*/
@ -163,17 +183,25 @@ namespace SourceMod
{
}
/**
* @brief Called when a warning occurs.
* @param error By-reference variable containing the error message of the warning.
* @param tokens Pointer to the token stream causing the error.
* @return SMCParseResult directive.
*/
virtual SMCParseResult ReadSMC_OnWarning(SMCParseError &error, const char *tokens)
{
return SMCParse_HaltFail;
}
/**
* @brief Called when entering a new section
*
* @param name Name of section, with the colon omitted.
* @param option Optional text after the colon, quotes removed. NULL if none.
* @param colon Whether or not the required ':' was encountered.
* @param opt_quotes Whether or not the option string was enclosed in quotes.
* @return SMCParseResult directive.
*/
virtual SMCParseResult ReadSMC_NewSection(const char *name,
const char *option,
bool colon)
virtual SMCParseResult ReadSMC_NewSection(const char *name, bool opt_quotes)
{
return SMCParse_Continue;
}
@ -198,7 +226,6 @@ namespace SourceMod
/**
* @brief Called when leaving the current section.
* Note: Skipping the section has no meaning here.
*
* @return SMCParseResult directive.
*/
@ -259,14 +286,12 @@ namespace SourceMod
* @param smc_listener Event handler for reading file.
* @param line If non-NULL, will contain last line parsed (0 if file could not be opened).
* @param col If non-NULL, will contain last column parsed (undefined if file could not be opened).
* @param strict If strict mode is enabled, the parsing rules are obeyed rigorously rather than loosely.
* @return True if parsing succeded, false if file couldn't be opened or there was a syntax error.
* @return An SMCParseError result code.
*/
virtual bool ParseFile_SMC(const char *file,
virtual SMCParseError ParseFile_SMC(const char *file,
ITextListener_SMC *smc_listener,
unsigned int *line,
unsigned int *col,
bool strict) =0;
unsigned int *col) =0;
public:
/**
* @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.

View File

@ -3,14 +3,25 @@
#include "sm_version.h"
#include "sourcemod.h"
#include "CTextParsers.h"
SourceMod_Core g_SourceMod_Core;
PLUGIN_EXPOSE(SourceMod, g_SourceMod_Core);
class Parser : public ITextListener_SMC
{
public:
};
bool SourceMod_Core::Load(PluginId id, ISmmAPI *ismm, char *error, size_t maxlen, bool late)
{
PLUGIN_SAVEVARS();
Parser p;
SMCParseError err = g_TextParse.ParseFile_SMC("c:\\debug.txt", &p, NULL, NULL);
return g_SourceMod.InitializeSourceMod(error, maxlen, late);
}

View File

@ -1,6 +1,7 @@
#include <stdio.h>
#include "PluginSys.h"
#include "LibrarySys.h"
#include "sourcemm_api.h"
CPluginManager g_PluginMngr;