/** * vim: set ts=4 : * =============================================================== * SourceMod, Copyright (C) 2004-2007 AlliedModders LLC. * All rights reserved. * =============================================================== * * This file is part of the SourceMod/SourcePawn SDK. This file may only be * used or modified under the Terms and Conditions of its License Agreement, * which is found in public/licenses/LICENSE.txt. As of this notice, derivative * works must be licensed under the GNU General Public License (version 2 or * greater). A copy of the GPL is included under public/licenses/GPL.txt. * * To view the latest information, see: http://www.sourcemod.net/license.php * * Version: $Id$ */ #ifndef _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_ #define _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_ /** * @file ITextParsers.h * @brief Defines various text/file parsing functions, as well as UTF-8 support code. */ #include namespace SourceMod { /** * The INI file format is defined as: * WHITESPACE: 0x20, \n, \t, \r * IDENTIFIER: A-Z a-z 0-9 _ - , + . $ ? / * STRING: Any set of symbols * * Basic syntax is comprised of SECTIONs. * A SECTION is defined as: * [SECTIONNAME] * OPTION * OPTION * OPTION... * * SECTIONNAME is an IDENTIFIER. * OPTION can be repeated any number of times, once per line. * OPTION is defined as one of: * KEY = "VALUE" * KEY = VALUE * KEY * Where KEY is an IDENTIFIER and VALUE is a STRING. * * WHITESPACE should always be omitted. * COMMENTS should be stripped, and are defined as text occurring in: * ; * * Example file below. Note that * The second line is technically invalid. The event handler * must decide whether this should be allowed. * --FILE BELOW-- * [gaben] * hi = clams * bye = "NO CLAMS" * * [valve] * cannot * maintain * products */ /** * @brief Contains parse events for INI files. */ class ITextListener_INI { public: /** * @brief Called when a new section is encountered in an INI file. * * @param section Name of section in between the [ and ] characters. * @param invalid_tokens True if invalid tokens were detected in the name. * @param close_bracket True if a closing bracket was detected, false otherwise. * @param extra_tokens True if extra tokens were detected on the line. * @param curtok Contains current token in the line where the section name starts. * You can add to this offset when failing to point to a token. * @return True to keep parsing, false otherwise. */ virtual bool ReadINI_NewSection(const char *section, bool invalid_tokens, bool close_bracket, bool extra_tokens, unsigned int *curtok) { return true; } /** * @brief Called when encountering a key/value pair in an INI file. * * @param key Name of key. * @param value String containing value (with quotes stripped, if any). * @param invalid_tokens Whether or not the key contained invalid tokens. * @param equal_token There was an '=' sign present (in case the value is missing). * @param quotes Whether value was enclosed in quotes. * @param curtok Contains the token index of the start of the value string. * This can be changed when returning false. * @return True to keep parsing, false otherwise. */ virtual bool ReadINI_KeyValue(const char *key, const char *value, bool invalid_tokens, bool equal_token, bool quotes, unsigned int *curtok) { return true; } /** * @brief Called after a line has been preprocessed, if it has text. * * @param line Contents of line. * @param curtok Pointer to optionally store failed position in string. * @return True to keep parsing, false otherwise. */ virtual bool ReadINI_RawLine(const char *line, unsigned int *curtok) { return true; } }; /** * :TODO: write this in CFG (context free grammar) format so it makes sense * * The SMC file format is defined as: * WHITESPACE: 0x20, \n, \t, \r * IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, / *, or WHITESPACE. * STRING: Any set of symbols enclosed in quotes. * Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER. * * Basic syntax is comprised of SECTIONBLOCKs. * A SECTIONBLOCK defined as: * * SECTIONNAME * { * OPTION * } * * OPTION can be repeated any number of times inside a SECTIONBLOCK. * A new line will terminate an OPTION, but there can be more than one OPTION per line. * OPTION is defined any of: * "KEY" "VALUE" * SECTIONBLOCK * * SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings * SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed. * If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace). * If KEY is not enclosed in quotes, the key is terminated at first whitespace. * If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace). * The VALUE may have inner quotes, but the key string may not. * * For an example, see configs/permissions.cfg * * WHITESPACE should be ignored. * Comments are text occurring inside the following tokens, and should be stripped * unless they are inside literal strings: * ; * // * / * */ /** * @brief Lists actions to take when an SMC parse hook is done. */ enum SMCParseResult { SMCParse_Continue, /**< Continue parsing */ SMCParse_Halt, /**< Stop parsing here */ SMCParse_HaltFail /**< Stop parsing and return SMCParseError_Custom */ }; /** * @brief Lists error codes possible from parsing an SMC file. */ enum SMCParseError { SMCParse_Okay = 0, /**< No error */ SMCParse_StreamOpen, /**< Stream failed to open */ SMCParse_StreamError, /**< The stream died... somehow */ SMCParse_Custom, /**< A custom handler threw an error */ SMCParse_InvalidSection1, /**< A section was declared without quotes, and had extra tokens */ SMCParse_InvalidSection2, /**< A section was declared without any header */ SMCParse_InvalidSection3, /**< A section ending was declared with too many unknown tokens */ SMCParse_InvalidSection4, /**< A section ending has no matching beginning */ SMCParse_InvalidSection5, /**< A section beginning has no matching ending */ SMCParse_InvalidTokens, /**< There were too many unidentifiable strings on one line */ SMCParse_TokenOverflow, /**< The token buffer overflowed */ SMCParse_InvalidProperty1, /**< A property was declared outside of any section */ }; /** * @brief Describes the events available for reading an SMC stream. */ class ITextListener_SMC { public: /** * @brief Called when starting parsing. */ virtual void ReadSMC_ParseStart() { }; /** * @brief Called when ending parsing. * * @param halted True if abnormally halted, false otherwise. * @param failed True if parsing failed, false otherwise. */ virtual void ReadSMC_ParseEnd(bool halted, bool failed) { } /** * @brief Called when a warning occurs. * @param error By-reference variable containing the error message of the warning. * @param tokens Pointer to the token stream causing the error. * @return SMCParseResult directive. */ virtual SMCParseResult ReadSMC_OnWarning(SMCParseError &error, const char *tokens) { return SMCParse_HaltFail; } /** * @brief Called when entering a new section * * @param name Name of section, with the colon omitted. * @param opt_quotes Whether or not the option string was enclosed in quotes. * @return SMCParseResult directive. */ virtual SMCParseResult ReadSMC_NewSection(const char *name, bool opt_quotes) { return SMCParse_Continue; } /** * @brief Called when encountering a key/value pair in a section. * * @param key Key string. * @param value Value string. If no quotes were specified, this will be NULL, and key will contain the entire string. * @param key_quotes Whether or not the key was in quotation marks. * @param value_quotes Whether or not the value was in quotation marks. * @return SMCParseResult directive. */ virtual SMCParseResult ReadSMC_KeyValue(const char *key, const char *value, bool key_quotes, bool value_quotes) { return SMCParse_Continue; } /** * @brief Called when leaving the current section. * * @return SMCParseResult directive. */ virtual SMCParseResult ReadSMC_LeavingSection() { return SMCParse_Continue; } /** * @brief Called after an input line has been preprocessed. * * @param line String containing line input. * @param curline Number of line in file. * @return SMCParseResult directive. */ virtual SMCParseResult ReadSMC_RawLine(const char *line, unsigned int curline) { return SMCParse_Continue; } }; #define SMINTERFACE_TEXTPARSERS_NAME "ITextParsers" #define SMINTERFACE_TEXTPARSERS_VERSION 1 /** * @brief Contains various text stream parsing functions. */ class ITextParsers : public SMInterface { public: virtual const char *GetInterfaceName() { return SMINTERFACE_TEXTPARSERS_NAME; } virtual unsigned int GetInterfaceVersion() { return SMINTERFACE_TEXTPARSERS_VERSION; } public: /** * @brief Parses an INI-format file. * * @param file Path to file. * @param ini_listener Event handler for reading file. * @param line If non-NULL, will contain last line parsed (0 if file could not be opened). * @param col If non-NULL, will contain last column parsed (undefined if file could not be opened). * @return True if parsing succeeded, false if file couldn't be opened or there was a syntax error. */ virtual bool ParseFile_INI(const char *file, ITextListener_INI *ini_listener, unsigned int *line, unsigned int *col) =0; /** * @brief Parses an SMC-format text file. * Note that the parser makes every effort to obey broken syntax. * For example, if an open brace is missing, but the section name has a colon, * it will let you know. It is up to the event handlers to decide whether to be strict or not. * * @param file Path to file. * @param smc_listener Event handler for reading file. * @param line If non-NULL, will contain last line parsed (0 if file could not be opened). * @param col If non-NULL, will contain last column parsed (undefined if file could not be opened). * @return An SMCParseError result code. */ virtual SMCParseError ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, unsigned int *line, unsigned int *col) =0; /** * @brief Converts an SMCParseError to a string. * * @param err SMCParseError. * @return String error message, or NULL if none. */ virtual const char *GetSMCErrorString(SMCParseError err) =0; public: /** * @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream. * If the current character is not multi-byte, the function returns 1. * * @param stream Pointer to multi-byte ANSI character string. * @return Number of bytes in current character. */ virtual unsigned int GetUTF8CharBytes(const char *stream) =0; }; } #endif //_INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_