sourcemod/public/ITextParsers.h

452 lines
14 KiB
C++

/**
* vim: set ts=4 :
* =============================================================================
* SourceMod
* Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved.
* =============================================================================
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, version 3.0, as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, AlliedModders LLC gives you permission to link the
* code of this program (as well as its derivative works) to "Half-Life 2," the
* "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
* by the Valve Corporation. You must obey the GNU General Public License in
* all respects for all other code used. Additionally, AlliedModders LLC grants
* this exception to all derivative works. AlliedModders LLC defines further
* exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
* or <http://www.sourcemod.net/license.php>.
*
* Version: $Id$
*/
#ifndef _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
#define _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
/**
* @file ITextParsers.h
* @brief Defines various text/file parsing functions, as well as UTF-8 support code.
*/
#include <IShareSys.h>
namespace SourceMod
{
#define SMINTERFACE_TEXTPARSERS_NAME "ITextParsers"
#define SMINTERFACE_TEXTPARSERS_VERSION 4
/**
* The INI file format is defined as:
* WHITESPACE: 0x20, \n, \t, \r
* IDENTIFIER: A-Z a-z 0-9 _ - , + . $ ? /
* STRING: Any set of symbols
*
* Basic syntax is comprised of SECTIONs.
* A SECTION is defined as:
* [SECTIONNAME]
* OPTION
* OPTION
* OPTION...
*
* SECTIONNAME is an IDENTIFIER.
* OPTION can be repeated any number of times, once per line.
* OPTION is defined as one of:
* KEY = "VALUE"
* KEY = VALUE
* KEY
* Where KEY is an IDENTIFIER and VALUE is a STRING.
*
* WHITESPACE should always be omitted.
* COMMENTS should be stripped, and are defined as text occurring in:
* ;<TEXT>
*
* Example file below. Note that
* The second line is technically invalid. The event handler
* must decide whether this should be allowed.
* --FILE BELOW--
* [gaben]
* hi = clams
* bye = "NO CLAMS"
*
* [valve]
* cannot
* maintain
* products
*/
/**
* @brief Contains parse events for INI files.
*/
class ITextListener_INI
{
public:
/**
* @brief Returns version number.
*/
virtual unsigned int GetTextParserVersion1()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
public:
/**
* @brief Called when a new section is encountered in an INI file.
*
* @param section Name of section in between the [ and ] characters.
* @param invalid_tokens True if invalid tokens were detected in the name.
* @param close_bracket True if a closing bracket was detected, false otherwise.
* @param extra_tokens True if extra tokens were detected on the line.
* @param curtok Contains current token in the line where the section name starts.
* You can add to this offset when failing to point to a token.
* @return True to keep parsing, false otherwise.
*/
virtual bool ReadINI_NewSection(const char *section,
bool invalid_tokens,
bool close_bracket,
bool extra_tokens,
unsigned int *curtok)
{
return true;
}
/**
* @brief Called when encountering a key/value pair in an INI file.
*
* @param key Name of key.
* @param value String containing value (with quotes stripped, if any).
* @param invalid_tokens Whether or not the key contained invalid tokens.
* @param equal_token There was an '=' sign present (in case the value is missing).
* @param quotes Whether value was enclosed in quotes.
* @param curtok Contains the token index of the start of the value string.
* This can be changed when returning false.
* @return True to keep parsing, false otherwise.
*/
virtual bool ReadINI_KeyValue(const char *key,
const char *value,
bool invalid_tokens,
bool equal_token,
bool quotes,
unsigned int *curtok)
{
return true;
}
/**
* @brief Called after a line has been preprocessed, if it has text.
*
* @param line Contents of line.
* @param curtok Pointer to optionally store failed position in string.
* @return True to keep parsing, false otherwise.
*/
virtual bool ReadINI_RawLine(const char *line, unsigned int *curtok)
{
return true;
}
};
/**
* :TODO: write this in CFG (context free grammar) format so it makes sense
*
* The SMC file format is defined as:
* WHITESPACE: 0x20, \n, \t, \r
* IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, / *, or WHITESPACE.
* STRING: Any set of symbols enclosed in quotes.
* Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER.
*
* Basic syntax is comprised of SECTIONBLOCKs.
* A SECTIONBLOCK defined as:
*
* SECTIONNAME
* {
* OPTION
* }
*
* OPTION can be repeated any number of times inside a SECTIONBLOCK.
* A new line will terminate an OPTION, but there can be more than one OPTION per line.
* OPTION is defined any of:
* "KEY" "VALUE"
* SECTIONBLOCK
*
* SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings
* SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed.
* If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace).
* If KEY is not enclosed in quotes, the key is terminated at first whitespace.
* If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace).
* The VALUE may have inner quotes, but the key string may not.
*
* For an example, see configs/permissions.cfg
*
* WHITESPACE should be ignored.
* Comments are text occurring inside the following tokens, and should be stripped
* unless they are inside literal strings:
* ;<TEXT>
* //<TEXT>
* / *<TEXT> */
/**
* @brief Lists actions to take when an SMC parse hook is done.
*/
enum SMCResult
{
SMCResult_Continue, /**< Continue parsing */
SMCResult_Halt, /**< Stop parsing here */
SMCResult_HaltFail /**< Stop parsing and return SMCError_Custom */
};
/**
* @brief Lists error codes possible from parsing an SMC file.
*/
enum SMCError
{
SMCError_Okay = 0, /**< No error */
SMCError_StreamOpen, /**< Stream failed to open */
SMCError_StreamError, /**< The stream died... somehow */
SMCError_Custom, /**< A custom handler threw an error */
SMCError_InvalidSection1, /**< A section was declared without quotes, and had extra tokens */
SMCError_InvalidSection2, /**< A section was declared without any header */
SMCError_InvalidSection3, /**< A section ending was declared with too many unknown tokens */
SMCError_InvalidSection4, /**< A section ending has no matching beginning */
SMCError_InvalidSection5, /**< A section beginning has no matching ending */
SMCError_InvalidTokens, /**< There were too many unidentifiable strings on one line */
SMCError_TokenOverflow, /**< The token buffer overflowed */
SMCError_InvalidProperty1, /**< A property was declared outside of any section */
};
/**
* @brief States for line/column
*/
struct SMCStates
{
unsigned int line; /**< Current line */
unsigned int col; /**< Current col */
};
/**
* @brief Describes the events available for reading an SMC stream.
*/
class ITextListener_SMC
{
public:
/**
* @brief Returns version number.
*/
virtual unsigned int GetTextParserVersion2()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
public:
/**
* @brief Called when starting parsing.
*/
virtual void ReadSMC_ParseStart()
{
};
/**
* @brief Called when ending parsing.
*
* @param halted True if abnormally halted, false otherwise.
* @param failed True if parsing failed, false otherwise.
*/
virtual void ReadSMC_ParseEnd(bool halted, bool failed)
{
}
/**
* @brief Called when entering a new section
*
* @param states Parsing states.
* @param name Name of section, with the colon omitted.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_NewSection(const SMCStates *states, const char *name)
{
return SMCResult_Continue;
}
/**
* @brief Called when encountering a key/value pair in a section.
*
* @param states Parsing states.
* @param key Key string.
* @param value Value string. If no quotes were specified, this will be NULL,
* and key will contain the entire string.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_KeyValue(const SMCStates *states, const char *key, const char *value)
{
return SMCResult_Continue;
}
/**
* @brief Called when leaving the current section.
*
* @param states Parsing states.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_LeavingSection(const SMCStates *states)
{
return SMCResult_Continue;
}
/**
* @brief Called after an input line has been preprocessed.
*
* @param states Parsing states.
* @param line Contents of the line, null terminated at the position
* of the newline character (thus, no newline will exist).
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_RawLine(const SMCStates *states, const char *line)
{
return SMCResult_Continue;
}
};
/**
* @brief Contains various text stream parsing functions.
*/
class ITextParsers : public SMInterface
{
public:
virtual const char *GetInterfaceName()
{
return SMINTERFACE_TEXTPARSERS_NAME;
}
virtual unsigned int GetInterfaceVersion()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
virtual bool IsVersionCompatible(unsigned int version)
{
if (version < 2)
{
return false;
}
return SMInterface::IsVersionCompatible(version);
}
public:
/**
* @brief Parses an INI-format file.
*
* @param file Path to file.
* @param ini_listener Event handler for reading file.
* @param line If non-NULL, will contain last line parsed (0 if file could not be opened).
* @param col If non-NULL, will contain last column parsed (undefined if file could not be opened).
* @return True if parsing succeeded, false if file couldn't be opened or there was a syntax error.
*/
virtual bool ParseFile_INI(const char *file,
ITextListener_INI *ini_listener,
unsigned int *line,
unsigned int *col) =0;
/**
* @brief Parses an SMC-format text file.
* Note that the parser makes every effort to obey broken syntax.
* For example, if an open brace is missing, but the section name has a colon,
* it will let you know. It is up to the event handlers to decide whether to be strict or not.
*
* @param file Path to file.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @return An SMCError result code.
*/
virtual SMCError ParseFile_SMC(const char *file,
ITextListener_SMC *smc_listener,
SMCStates *states) =0;
/**
* @brief Converts an SMCError to a string.
*
* @param err SMCError.
* @return String error message, or NULL if none.
*/
virtual const char *GetSMCErrorString(SMCError err) =0;
public:
/**
* @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.
* If the current character is not multi-byte, the function returns 1.
*
* @param stream Pointer to multi-byte ANSI character string.
* @return Number of bytes in current character.
*/
virtual unsigned int GetUTF8CharBytes(const char *stream) =0;
/**
* @brief Returns whether the first multi-byte character in the given stream
* is a whitespace character.
*
* @param stream Pointer to multi-byte character string.
* @return True if first character is whitespace, false otherwise.
*/
virtual bool IsWhitespace(const char *stream) =0;
/**
* @brief Same as ParseFile_SMC, but with an extended error buffer.
*
* @param file Path to file.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @param buffer Error message buffer.
* @param maxsize Maximum size of the error buffer.
* @return Error code.
*/
virtual SMCError ParseSMCFile(const char *file,
ITextListener_SMC *smc_listener,
SMCStates *states,
char *buffer,
size_t maxsize) =0;
/**
* @brief Parses a raw UTF8 stream as an SMC file.
*
* @param stream Memory containing data.
* @param length Number of bytes in the stream.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @param buffer Error message buffer.
* @param maxsize Maximum size of the error buffer.
* @return Error code.
*/
virtual SMCError ParseSMCStream(const char *stream,
size_t length,
ITextListener_SMC *smc_listener,
SMCStates *states,
char *buffer,
size_t maxsize) =0;
};
inline unsigned int _GetUTF8CharBytes(const char *stream)
{
unsigned char c = *(unsigned char *)stream;
if (c & (1<<7))
{
if (c & (1<<5))
{
if (c & (1<<4))
{
return 4;
}
return 3;
}
return 2;
}
return 1;
}
}
extern SourceMod::ITextParsers *textparsers;
#endif //_INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_