diff --git a/core/CTextParsers.cpp b/core/CTextParsers.cpp index 9c76d5c1..34f247bf 100644 --- a/core/CTextParsers.cpp +++ b/core/CTextParsers.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "CTextParsers.h" CTextParsers g_TextParse; @@ -33,13 +34,572 @@ unsigned int CTextParsers::GetUTF8CharBytes(const char *stream) return _GetUTF8CharBytes(stream); } -bool CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col, bool strict) -{ - /* This beast is a lot more rigorous than our INI friend. */ +/** + * Character streams + */ - return false; +struct CharStream +{ + const char *curpos; +}; + +bool CharStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read) +{ + CharStream *srdr = (CharStream *)stream; + + const char *ptr = srdr->curpos; + for (size_t i=0; icurpos; + + srdr->curpos = ptr; + + return true; } +SMCParseError CTextParsers::ParseString_SMC(const char *stream, + ITextListener_SMC *smc, + unsigned int *line, + unsigned int *col) +{ + CharStream srdr = { stream }; + + return ParseStream_SMC(&srdr, CharStreamReader, smc, line, col); +} + +/** + * File streams + */ + +bool FileStreamReader(void *stream, char *buffer, size_t maxlength, unsigned int *read) +{ + size_t num = fread(buffer, 1, maxlength, (FILE *)stream); + + *read = static_cast(num); + + if (num == 0 && feof((FILE *)stream)) + { + return true; + } + + return (ferror((FILE *)stream) == 0); +} + +SMCParseError CTextParsers::ParseFile_SMC(const char *file, ITextListener_SMC *smc, unsigned int *line, unsigned int *col) +{ + FILE *fp = fopen(file, "rt"); + + if (!fp) + { + return SMCParse_StreamOpen; + } + + SMCParseError result = ParseStream_SMC(fp, FileStreamReader, smc, line, col); + + fclose(fp); + + return result; +} + +/** + * Raw parsing of streams with helper functions + */ + +struct StringInfo +{ + StringInfo() : quoted(false), ptr(NULL), end(NULL), special(false) { } + char *ptr; + char *end; + bool special; + bool quoted; +}; + +const char *FixupString(StringInfo &data) +{ + if (!data.ptr) + { + return NULL; + } + + if (data.quoted) + { + data.ptr++; + } +#if defined _DEBUG + else { + /* A string will never have beginning whitespace because we ignore it in the stream. + * Furthermore, if there is trailing whitespace, the end ptr will point to it, so it is valid + * to overwrite! Lastly, the last character must be whitespace or a comment/invalid character. + */ + } +#endif + + if (data.special) + { + //:TODO: this string has special tokens in it, like \, and we must + //resolve these before passing the string back to the app + } + + *(data.end) = '\0'; + + return data.ptr; +} + +const char *rotate(StringInfo info[3]) +{ + if (info[2].ptr != NULL) + { + return info[2].ptr; + } + + if (info[0].ptr != NULL) + { + info[2] = info[1]; + info[1] = info[0]; + info[0] = StringInfo(); + } + + return NULL; +} + +void scrap(StringInfo info[3]) +{ + info[2] = StringInfo(); + info[1] = StringInfo(); + info[0] = StringInfo(); +} + +void reloc(StringInfo &data, unsigned int bytes) +{ + if (data.ptr) + { + data.ptr -= bytes; + } + if (data.end) + { + data.end -= bytes; + } +} + +char *lowstring(StringInfo info[3]) +{ + for (int i=2; i>=0; i--) + { + if (info[i].ptr) + { + return info[i].ptr; + } + } + + return NULL; +} + +SMCParseError CTextParsers::ParseStream_SMC(void *stream, + STREAMREADER srdr, + ITextListener_SMC *smc, + unsigned int *line, + unsigned int *col) +{ + char in_buf[4096]; + char *reparse_point = NULL; + char *parse_point = in_buf; + char *line_begin = in_buf; + unsigned int read; + unsigned int curline = 1; + unsigned int curtok = 0; + unsigned int curlevel = 0; + bool in_quote = false; + bool ignoring = false; + bool eol_comment = false; + bool ml_comment = false; + unsigned int i; + SMCParseError err = SMCParse_Okay; + SMCParseResult res; + char c; + + StringInfo strings[3]; + StringInfo emptystring; + + smc->ReadSMC_ParseStart(); + + while (srdr(stream, parse_point, sizeof(in_buf) - (parse_point - line_begin) - 1, &read)) + { + if (!read) + { + err = SMCParse_StreamError; + goto failed; + } + + /* :TODO: do this outside of the main loop somehow + * This checks for BOM markings + */ + if (curline == 1 && + in_buf[0] == (char)0xEF && + in_buf[1] == (char)0xBB && + in_buf[2] == (char)0xBF) + { + parse_point = &in_buf[3]; + } + + if (reparse_point) + { + read += (parse_point - reparse_point); + parse_point = reparse_point; + reparse_point = NULL; + } + + for (i=0; iReadSMC_RawLine(line_begin, curline)) != SMCParse_Continue) + { + err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay; + goto failed; + } + + /* Now we check the sanity of our staged strings! */ + if (strings[2].ptr) + { + if (!curlevel) + { + err = SMCParse_InvalidProperty1; + goto failed; + } + /* Assume the next string is a property and pass the info on. */ + if ((res=smc->ReadSMC_KeyValue( + FixupString(strings[2]), + FixupString(strings[1]), + strings[2].quoted, + strings[1].quoted)) != SMCParse_Continue) + { + err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay; + goto failed; + } + scrap(strings); + } + + /* Change the states for the next line */ + curtok = 0; + curline++; + line_begin = &parse_point[i+1]; //Note: safe because this gets relocated later + } else if (ignoring) { + if (in_quote) + { + /* If i was 0, this case is impossible due to reparsing */ + if ((i != 0) && c == '"' && parse_point[i-1] != '\\') + { + /* If we reached a quote in an ignore phase, + * we're staging a string and we must rotate it out. + */ + in_quote = false; + ignoring = false; + /* Set our info */ + strings[0].end = &parse_point[i]; + strings[0].quoted = true; + if (rotate(strings) != NULL) + { + /* If we rotated too many strings, there was too much crap on one line */ + err = SMCParse_InvalidTokens; + goto failed; + } + } else if (c == '\\' && i == (read - 1)) { + strings[0].special = true; + reparse_point = &parse_point[i]; + break; + } + } else if (ml_comment) { + if (c == '*') + { + /* Check if we need to get more input first */ + if (i == read - 1) + { + reparse_point = &parse_point[i]; + break; + } + if (parse_point[i+1] == '/') + { + ml_comment = false; + ignoring = false; + /* We should not be staging anything right now. */ + assert(strings[0].ptr == NULL); + /* Advance the input stream so we don't choke on this token */ + i++; + curtok++; + } + } + } + } else { + /* Check if we're whitespace or not */ + if (!g_ws_chartable[c]) + { + bool restage = false; + /* Check various special tokens: + * ; + * // + * /* + * { + * } + */ + if (c == ';' || c == '/') + { + /* If it's a line-based comment (that is, ; or //) + * we will need to scrap everything until the end of the line. + */ + if (c == '/') + { + if (i == read - 1) + { + /* If we reached the end of the look-ahead, we need to re-check our input. + * Breaking out will force this to be the new reparse point! + */ + reparse_point = &reparse_point[i]; + break; + } + if (parse_point[i + 1] == '/') + { + /* standard comment */ + ignoring = true; + eol_comment = true; + restage = true; + } else if (parse_point[i+1] == '*') { + /* inline comment - start ignoring */ + ignoring = true; + ml_comment = true; + /* yes, we restage, meaning that: + * STR/*stuff* /ING (space because ml comments don't nest in C++) + * will not generate 'STRING', but rather 'STR' and 'ING'. + * This should be a rare occurrence and is done here for convenience. + */ + restage = true; + } + } else { + ignoring = true; + eol_comment = true; + restage = true; + } + } else if (c == '{') { + /* If we are staging a string, we must rotate here */ + if (strings[0].ptr) + { + /* We have unacceptable tokens on this line */ + if (rotate(strings) != NULL) + { + err = SMCParse_InvalidSection1; + goto failed; + } + } + /* Sections must always be alone */ + if (strings[2].ptr != NULL) + { + err = SMCParse_InvalidSection1; + goto failed; + } else if (strings[1].ptr == NULL) { + err = SMCParse_InvalidSection2; + goto failed; + } + if ((res=smc->ReadSMC_NewSection(FixupString(strings[1]), strings[1].quoted)) + != SMCParse_Continue) + { + err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay; + goto failed; + } + strings[1] = emptystring; + curlevel++; + } else if (c == '}') { + /* Unlike our matching friend, this can be on the same line as something prior */ + if (rotate(strings) != NULL) + { + err = SMCParse_InvalidSection3; + goto failed; + } + if (strings[2].ptr) + { + if (!curlevel) + { + err = SMCParse_InvalidProperty1; + goto failed; + } + if ((res=smc->ReadSMC_KeyValue( + FixupString(strings[2]), + FixupString(strings[1]), + strings[2].quoted, + strings[1].quoted)) + != SMCParse_Continue) + { + err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay; + goto failed; + } + } else if (strings[1].ptr) { + err = SMCParse_InvalidSection3; + goto failed; + } else if (!curlevel) { + err = SMCParse_InvalidSection4; + goto failed; + } + /* Now it's safe to leave the section */ + scrap(strings); + if ((res=smc->ReadSMC_LeavingSection()) != SMCParse_Continue) + { + err = (res == SMCParse_HaltFail) ? SMCParse_Custom : SMCParse_Okay; + goto failed; + } + curlevel--; + } else if (c == '"') { + /* If we get a quote mark, we always restage, but we need to do it beforehand */ + if (strings[0].ptr) + { + strings[0].end = &parse_point[i]; + if (rotate(strings) != NULL) + { + err = SMCParse_InvalidTokens; + goto failed; + } + } + strings[0].ptr = &parse_point[i]; + in_quote = true; + ignoring = true; + } else if (!strings[0].ptr) { + /* If we have no string, we must start one */ + strings[0].ptr = &parse_point[i]; + } + if (restage && strings[0].ptr) + { + strings[0].end = &parse_point[i]; + if (rotate(strings) != NULL) + { + err = SMCParse_InvalidTokens; + goto failed; + } + } + } else { + /* If we're eating a string and get whitespace, we need to restage. + * (Note that if we are quoted, this is being ignored) + */ + if (strings[0].ptr) + { + /* + * The specification says the second string in a pair does not need to be quoted. + * Thus, we check if there's already a string on the stack. + * If there's a newline, we always rotate so the newline has an empty starter. + */ + if (!strings[1].ptr) + { + /* There's no string, so we must move this one down and eat up another */ + strings[0].end = &parse_point[i]; + rotate(strings); + } else if (!strings[1].quoted) { + err = SMCParse_InvalidTokens; + goto failed; + } + } + } + } + + /* Advance which token we're on */ + curtok++; + } + + if (line_begin != in_buf) + { + /* The line buffer has advanced, so it's safe to copy N bytes back to the beginning. + * What's N? N is the lowest point we're currently relying on. + */ + char *stage = lowstring(strings); + if (!stage || stage > line_begin) + { + stage = line_begin; + } + unsigned int bytes = read - (stage - parse_point); + + /* It is now safe to delete everything before the staged point */ + memmove(in_buf, stage, bytes); + + /* Calculate the number of bytes in the new buffer */ + bytes = stage - in_buf; + /* Relocate all the cached pointers to our new base */ + line_begin -= bytes; + reloc(strings[0], bytes); + reloc(strings[1], bytes); + reloc(strings[2], bytes); + if (reparse_point) + { + reparse_point -= bytes; + } + if (parse_point) + { + parse_point = &parse_point[read]; + parse_point -= bytes; + } + } else if (read == sizeof(in_buf) - 1) { + err = SMCParse_TokenOverflow; + goto failed; + } + } + + /* If we're done parsing and there are tokens left over... */ + if (curlevel) + { + err = SMCParse_InvalidSection5; + goto failed; + } else if (strings[0].ptr || strings[1].ptr) { + err = SMCParse_InvalidTokens; + goto failed; + } + + smc->ReadSMC_ParseEnd(false, false); + + return SMCParse_Okay; + +failed: + if (line) + { + *line = curline; + } + + if (col) + { + *col = curtok; + } + + return err; +} + + +/** + * INI parser + */ + bool CTextParsers::ParseFile_INI(const char *file, ITextListener_INI *ini_listener, unsigned int *line, unsigned int *col) { FILE *fp = fopen(file, "rt"); diff --git a/core/CTextParsers.h b/core/CTextParsers.h index 111a5c52..0c0f48d0 100644 --- a/core/CTextParsers.h +++ b/core/CTextParsers.h @@ -23,6 +23,15 @@ inline unsigned int _GetUTF8CharBytes(const char *stream) return 1; } +/** + * @param void * IN: Stream pointer + * @param char * IN/OUT: Stream buffer + * @param size_t IN: Maximum size of buffer + * @param unsigned int * OUT: Number of bytes read (0 = end of stream) + * @return True on success, false on failure + */ +typedef bool (*STREAMREADER)(void *, char *, size_t, unsigned int *); + class CTextParsers : public ITextParsers { public: @@ -33,13 +42,23 @@ public: unsigned int *line, unsigned int *col); - virtual bool ParseFile_SMC(const char *file, + virtual SMCParseError ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, - unsigned int *col, - bool strict); + unsigned int *col); virtual unsigned int GetUTF8CharBytes(const char *stream); +private: + SMCParseError ParseString_SMC(const char *stream, + ITextListener_SMC *smc, + unsigned int *line, + unsigned int *col); + SMCParseError ParseStream_SMC(void *stream, + STREAMREADER srdr, + ITextListener_SMC *smc, + unsigned int *line, + unsigned int *col); + }; extern CTextParsers g_TextParse; diff --git a/core/interfaces/ITextParsers.h b/core/interfaces/ITextParsers.h index c4be06e3..0812cdfc 100644 --- a/core/interfaces/ITextParsers.h +++ b/core/interfaces/ITextParsers.h @@ -102,17 +102,18 @@ namespace SourceMod }; /** - * :TODO: write this in CFG format so it makes sense + * :TODO: write this in CFG (context free grammar) format so it makes sense * * The SMC file format is defined as: * WHITESPACE: 0x20, \n, \t, \r - * IDENTIFIER: Any ASCII character EXCLUDING ", ', :, WHITESPACE - * STRING: Any set of symbols + * IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, /*, or WHITESPACE. + * STRING: Any set of symbols enclosed in quotes. + * Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER. * * Basic syntax is comprised of SECTIONBLOCKs. * A SECTIONBLOCK defined as: * - * SECTION: "SECTIONNAME" + * SECTIONNAME * { * OPTION * } @@ -121,11 +122,14 @@ namespace SourceMod * A new line will terminate an OPTION, but there can be more than one OPTION per line. * OPTION is defined any of: * "KEY" "VALUE" - * "SINGLEKEY" * SECTIONBLOCK * - * SECTION is an IDENTIFIER * SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings + * SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed. + * If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace). + * If KEY is not enclosed in quotes, the key is terminated at first whitespace. + * If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace). + * The VALUE may have inner quotes, but the key string may not. * * For an example, see configs/permissions.cfg * @@ -135,17 +139,33 @@ namespace SourceMod * ; * // * /* */ + + enum SMCParseResult + { + SMCParse_Continue, //continue parsing + SMCParse_Halt, //stop parsing here + SMCParse_HaltFail //stop parsing and return failure + }; + + enum SMCParseError + { + SMCParse_Okay, //no error + SMCParse_StreamOpen, //stream failed to open + SMCParse_StreamError, //the stream died... somehow + SMCParse_Custom, //a custom handler threw an error + SMCParse_InvalidSection1, //a section was declared without quotes, and had extra tokens + SMCParse_InvalidSection2, //a section was declared without any header + SMCParse_InvalidSection3, //a section ending was declared with too many unknown tokens + SMCParse_InvalidSection4, //a section ending has no matching beginning + SMCParse_InvalidSection5, //a section beginning has no matching ending + SMCParse_InvalidTokens, //there were too many unidentifiable strings on one line + SMCParse_TokenOverflow, //the token buffer overflowed + SMCParse_InvalidProperty1, //a property was declared outside of any section + }; + class ITextListener_SMC { public: - enum SMCParseResult - { - SMCParse_Continue, //continue parsing - SMCParse_SkipSection, //skip the rest of the current section - SMCParse_Halt, //stop parsing here - SMCParse_HaltFail //stop parsing and return failure - }; - /** * @brief Called when starting parsing. */ @@ -163,17 +183,25 @@ namespace SourceMod { } + /** + * @brief Called when a warning occurs. + * @param error By-reference variable containing the error message of the warning. + * @param tokens Pointer to the token stream causing the error. + * @return SMCParseResult directive. + */ + virtual SMCParseResult ReadSMC_OnWarning(SMCParseError &error, const char *tokens) + { + return SMCParse_HaltFail; + } + /** * @brief Called when entering a new section * * @param name Name of section, with the colon omitted. - * @param option Optional text after the colon, quotes removed. NULL if none. - * @param colon Whether or not the required ':' was encountered. + * @param opt_quotes Whether or not the option string was enclosed in quotes. * @return SMCParseResult directive. */ - virtual SMCParseResult ReadSMC_NewSection(const char *name, - const char *option, - bool colon) + virtual SMCParseResult ReadSMC_NewSection(const char *name, bool opt_quotes) { return SMCParse_Continue; } @@ -198,7 +226,6 @@ namespace SourceMod /** * @brief Called when leaving the current section. - * Note: Skipping the section has no meaning here. * * @return SMCParseResult directive. */ @@ -259,14 +286,12 @@ namespace SourceMod * @param smc_listener Event handler for reading file. * @param line If non-NULL, will contain last line parsed (0 if file could not be opened). * @param col If non-NULL, will contain last column parsed (undefined if file could not be opened). - * @param strict If strict mode is enabled, the parsing rules are obeyed rigorously rather than loosely. - * @return True if parsing succeded, false if file couldn't be opened or there was a syntax error. + * @return An SMCParseError result code. */ - virtual bool ParseFile_SMC(const char *file, + virtual SMCParseError ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, - unsigned int *col, - bool strict) =0; + unsigned int *col) =0; public: /** * @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream. diff --git a/core/sourcemm_api.cpp b/core/sourcemm_api.cpp index 2e862a26..8aa855f2 100644 --- a/core/sourcemm_api.cpp +++ b/core/sourcemm_api.cpp @@ -3,14 +3,25 @@ #include "sm_version.h" #include "sourcemod.h" +#include "CTextParsers.h" + SourceMod_Core g_SourceMod_Core; PLUGIN_EXPOSE(SourceMod, g_SourceMod_Core); +class Parser : public ITextListener_SMC +{ +public: +}; + bool SourceMod_Core::Load(PluginId id, ISmmAPI *ismm, char *error, size_t maxlen, bool late) { PLUGIN_SAVEVARS(); + Parser p; + + SMCParseError err = g_TextParse.ParseFile_SMC("c:\\debug.txt", &p, NULL, NULL); + return g_SourceMod.InitializeSourceMod(error, maxlen, late); } diff --git a/core/systems/PluginSys.cpp b/core/systems/PluginSys.cpp index 3962ee84..2a374f35 100644 --- a/core/systems/PluginSys.cpp +++ b/core/systems/PluginSys.cpp @@ -1,6 +1,7 @@ #include #include "PluginSys.h" #include "LibrarySys.h" +#include "sourcemm_api.h" CPluginManager g_PluginMngr;