sourcemod/public/sm_trie_tpl.h

/**
 * vim: set ts=4 :
 * =============================================================================
 * SourceMod
 * Copyright (C) 2004-2008 AlliedModders LLC.  All rights reserved.
 * =============================================================================
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License, version 3.0, as published by the
 * Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * As a special exception, AlliedModders LLC gives you permission to link the
 * code of this program (as well as its derivative works) to "Half-Life 2," the
 * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
 * by the Valve Corporation.  You must obey the GNU General Public License in
 * all respects for all other code used.  Additionally, AlliedModders LLC grants
 * this exception to all derivative works.  AlliedModders LLC defines further
 * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
 * or <http://www.sourcemod.net/license.php>.
 *
 * Version: $Id$
 */

#ifndef _INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_
#define _INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_

#include <new>
#include <string.h>
#include <malloc.h>
#include <assert.h>

enum NodeType
{
	Node_Unused = 0,		/* Node is not being used (sparse) */
	Node_Arc,				/* Node is part of an arc and does not terminate */
	Node_Term,				/* Node is a terminator */
};

/**
 * @brief Trie class for storing key/value pairs, based on double array tries.
 * @file sm_trie_tpl.h
 *
 * For full works cited and implementation overview, there is a big comment
 * block at the bottom of this file.
 */

template <typename K>
class KTrie
{
	class KTrieNode;
public:
	/**
	 * @brief Clears all set objects in the trie.
	 */
	void clear()
	{
		run_destructors();
		internal_clear();
	}

	/**
	 * @brief Removes a key from the trie.
	 *
	 * @param key		Key to remove.
	 * @return			True on success, false if key was never set.
	 */
	bool remove(const char *key)
	{
		KTrieNode *node = internal_retrieve(key);
		if (!node || !node->valset)
		{
			return false;
		}

		node->value.~K();
		node->valset = false;

		m_numElements--;

		return true;
	}

	/**
	 * @brief Retrieves a pointer to the object stored at a given key.
	 *
	 * @param key		Key to retrieve.
	 * @return			Pointer to object, or NULL if key was not found or not set.
	 */
	K * retrieve(const char *key)
	{
		KTrieNode *node = internal_retrieve(key);
		if (!node || !node->valset)
		{
			return NULL;
		}
		return &node->value;
	}

	/**
	 * @brief Inserts or updates the object stored at a key.
	 *
	 * @param key		Key to update or insert.
	 * @param obj		Object to store at the key.
	 * @return			True on success, false on failure.
	 */
	bool replace(const char *key, const K & obj)
	{
		KTrieNode *prev_node = internal_retrieve(key);
		if (!prev_node)
		{
			return insert(key, obj);
		}

		if (prev_node->valset)
		{
			prev_node->value.~K();
		}

		new (&prev_node->value) K(obj);

		return true;
	}

	/**
	 * @brief Inserts an object at a key.
	 *
	 * @param key		Key to insert at.
	 * @param obj		Object to store at the key.
	 * @return			True on success, false if the key is already set or
	 *					insertion otherwise failed.
	 */
	bool insert(const char *key, const K & obj)
	{
		unsigned int lastidx = 1;		/* the last node index */
		unsigned int curidx;			/* current node index */
		const char *keyptr = key;		/* input stream at current token */
		KTrieNode *node = NULL;			/* current node being processed */
		KTrieNode *basenode = NULL;		/* current base node being processed */
		unsigned int q;					/* temporary var for x_check results */
		unsigned int curoffs;			/* current offset */

		/**
		 * Empty strings are a special case, since there are no productions.  We could
		 * probably rework it to use BASE[0] but this hack is easier.
		 */
		if (*key == '\0')
		{
			if (m_empty != NULL && m_empty->valset)
			{
				return false;
			}

			if (m_empty == NULL)
			{
				m_empty = (KTrieNode *)malloc(sizeof(KTrieNode));
			}

			m_empty->valset = true;
			new (&m_empty->value) K(obj);

			m_numElements++;

			return true;
		}

		/* Start traversing at the root node (1) */
		do
		{
			/* Find where the next character is, then advance */
			curidx = m_base[lastidx].idx;
			basenode = &m_base[curidx];
			curoffs = charval(*keyptr);
			curidx += curoffs;
			node = &m_base[curidx];
			keyptr++;

			/* Check if this slot is supposed to be empty.  If so, we need to handle CASES 1/2:
			 * Insertion without collisions
			 */
			if ( (curidx > m_baseSize) || (node->mode == Node_Unused) )
			{
				if (curidx > m_baseSize)
				{
					if (!grow())
					{
						return false;
					}
					node = &m_base[curidx];
				}
				node->parent = lastidx;
				if (*keyptr == '\0')
				{
					node->mode = Node_Arc;
				}
				else
				{
					node->idx = x_addstring(keyptr);
					node->mode = Node_Term;
				}
				node->valset = true;
				new (&node->value) K(obj);

				m_numElements++;

				return true;
			}
			else if (node->parent != lastidx)
			{
				/* Collision! We have to split up the tree here.  CASE 4:
				 * Insertion when a new word is inserted with a collision.
				 * NOTE: This is the hardest case to handle.  All below examples are based on:
				 * BACHELOR, BADGE, inserting BABY.
				 * The problematic production here is A -> B, where B is already being used.
			     *
				 * This process has to rotate one half of the 'A' arc.  We generate two lists:
				 *  Outgoing Arcs - Anything leaving this 'A'
				 *  Incoming Arcs - Anything going to this 'A'
				 * Whichever list is smaller will be moved.  Note that this works because the intersection
				 * affects both arc chains, and moving one will make the slot available to either.
				 */
				KTrieNode *cur;

				/* Find every node arcing from the last node.
				 * I.e. for BACHELOR, BADGE, BABY,
				 * The arcs leaving A will be C and D, but our current node is B -> *.
				 * Thus, we use the last index (A) to find the base for arcs leaving A.
				 */
				unsigned int outgoing_base = m_base[lastidx].idx;
				unsigned int outgoing_list[256];
				unsigned int outgoing_count = 0;	/* count the current index here */
				cur = &m_base[outgoing_base] + 1;
				unsigned int outgoing_limit = 255;

				if (outgoing_base + outgoing_limit > m_baseSize)
				{
					outgoing_limit = m_baseSize - outgoing_base;
				}

				for (unsigned int i=1; i<=outgoing_limit; i++,cur++)
				{
					if (cur->mode == Node_Unused || cur->parent != lastidx)
					{
						continue;
					}
					outgoing_list[outgoing_count++] = i;
				}
				outgoing_list[outgoing_count++] = curidx - outgoing_base;

				/* Now we need to find all the arcs leaving our parent...
				 * Note: the inconsistency is the base of our parent.
				 */
				assert(m_base[node->parent].mode == Node_Arc);
				unsigned int incoming_list[256];
				unsigned int incoming_base = m_base[node->parent].idx;
				unsigned int incoming_count = 0;
				unsigned int incoming_limit = 255;
				cur = &m_base[incoming_base] + 1;

				if (incoming_base + incoming_limit > m_baseSize)
				{
					incoming_limit = m_baseSize - incoming_base;
				}

				assert(incoming_limit > 0 && incoming_limit <= 255);

				for (unsigned int i=1; i<=incoming_limit; i++,cur++)
				{
					if (cur->mode == Node_Arc || cur->mode == Node_Term)
					{
						if (cur->parent == node->parent)
						{
							incoming_list[incoming_count++] = i;
						}
					}
				}

				if (incoming_count < outgoing_count + 1)
				{
					unsigned int q = x_check_multi(incoming_list, incoming_count);

					node = &m_base[curidx];

					/* If we're incoming, we need to modify our parent */
					m_base[node->parent].idx = q;

					/* For each node in the "to move" list,
					 * Relocate the node's info to the new position.
					 */
					unsigned int idx, newidx, oldidx;
					for (unsigned int i=0; i<incoming_count; i++)
					{
						idx = incoming_list[i];
						newidx = q + idx;
						oldidx = incoming_base + idx;
						if (oldidx == lastidx)
						{
							/* Important! Make sure we're not invalidating our sacred lastidx */
							lastidx = newidx;
						}
						/* Fully copy the node */
						memcpy(&m_base[newidx], &m_base[oldidx], sizeof(KTrieNode));
						if (m_base[oldidx].valset)
						{
							new (&m_base[newidx].value) K(m_base[oldidx].value);
							m_base[oldidx].value.~K();
						}
						assert(m_base[m_base[newidx].parent].mode == Node_Arc);
						/* Erase old data */
						memset(&m_base[oldidx], 0, sizeof(KTrieNode));
						/* If we are not a terminator, we have children we must take care of */
						if (m_base[newidx].mode == Node_Arc)
						{
							KTrieNode *check_base = &m_base[m_base[newidx].idx] + 1;
							outgoing_limit = (m_base + m_baseSize + 1) - check_base;
							if (outgoing_limit > 255)
							{
								outgoing_limit = 255;
							}
							for (unsigned int j=1; j<=outgoing_limit; j++, check_base++)
							{
								if (check_base->parent == oldidx)
								{
									check_base->parent = newidx;
								}
							}
						}
					}
				}
				else
				{
					unsigned int q = x_check_multi(outgoing_list, outgoing_count);

					node = &m_base[curidx];

					/* If we're outgoing, we need to modify our own base */
					m_base[lastidx].idx = q;

					/* Take the last index (curidx) out of the list.  Technically we are not moving this,
					 * since it's already being used by something else.
					 */
					outgoing_count--;

					/* For each node in the "to move" list,
					 * Relocate the node's info to the new position.
					 */
					unsigned int idx, newidx, oldidx;
					for (unsigned int i=0; i<outgoing_count; i++)
					{
						idx = outgoing_list[i];
						newidx = q + idx;
						oldidx = outgoing_base + idx;
						if (oldidx == lastidx)
						{
							/* Important! Make sure we're not invalidating our sacred lastidx */
							lastidx = newidx;
						}
						/* Fully copy the node */
						memcpy(&m_base[newidx], &m_base[oldidx], sizeof(KTrieNode));
						if (m_base[oldidx].valset)
						{
							new (&m_base[newidx].value) K(m_base[oldidx].value);
							m_base[oldidx].value.~K();
						}
						assert(m_base[m_base[newidx].parent].mode == Node_Arc);
						/* Erase old data */
						memset(&m_base[oldidx], 0, sizeof(KTrieNode));
						/* If we are not a terminator, we have children we must take care of */
						if (m_base[newidx].mode == Node_Arc)
						{
							KTrieNode *check_base = &m_base[m_base[newidx].idx] + 1;
							outgoing_limit = (m_base + m_baseSize + 1) - check_base;
							if (outgoing_limit > 255)
							{
								outgoing_limit = 255;
							}
							for (unsigned int j=1; j<=outgoing_limit; j++, check_base++)
							{
								if (check_base->parent == oldidx)
								{
									check_base->parent = newidx;
								}
							}
						}
					}

					/* Take the invisible node and use it as our new node */
					node = &m_base[q + outgoing_list[outgoing_count]];
				}

				/* We're finally done! */
				node->parent = lastidx;
				if (*keyptr == '\0')
				{
					node->mode = Node_Arc;
				}
				else
				{
					node->idx = x_addstring(keyptr);
					node->mode = Node_Term;
				}
				node->valset = true;
				new (&node->value) K(obj);

				m_numElements++;

				return true;
			}
			else
			{
				/* See what's in the next node - special case if terminator! */
				if (node->mode == Node_Term)
				{
					/* If we're a terminator, we need to handle CASE 3:
					 * Insertion when a terminating collision occurs
					 */
					char *term = &m_stringtab[node->idx];
					/* Do an initial browsing to make sure they're not the same string */
					if (strcmp(keyptr, term) == 0)
					{
						if (!node->valset)
						{
							node->valset = true;
							new (&node->value) K(obj);
							m_numElements++;
							return true;
						}
						/* Same string.  We can't insert. */
						return false;
					}
					/* For each matching character pair, we need to disband the terminator.
					 * This splits the similar prefix into a single arc path.
					 * First, save the old values so we can move them to a new node.
					 * Next, for each loop:
					 *  Take the current (invalid) node, and point it to the next arc base.
					 *  Set the current node to the node at the next arc.
					 */
					K oldvalue;
					bool oldvalset = node->valset;
					if (oldvalset)
					{
						oldvalue = node->value;
					}
					if (*term == *keyptr)
					{
						while (*term == *keyptr)
						{
							/* Find the next free slot in the check array.
							 * This is the "vector base" essentially
							 */
							q = x_check(*term);
							node = &m_base[curidx];
							/* Point the node to the next new base */
							node->idx = q;
							node->mode = Node_Arc;
							if (node->valset == true)
							{
								node->value.~K();
								node->valset = false;
							}
							/* Advance the input stream and local variables */
							lastidx = curidx;
							curidx = q + charval(*term);
							node = &m_base[curidx];
							/* Make sure the new current node has its parent set. */
							node->parent = lastidx;
							node->mode = Node_Arc;	/* Just in case we run x_check again */
							*term = '\0';	/* Unmark the string table here */
							term++;
							keyptr++;
						}
					}
					else if (node->valset)
					{
						node->valset = false;
						node->value.~K();
					}
					/* We're done inserting new pairs.  If one of them is exhausted,
					 * we take special shortcuts.
					 */
					if (*term == '\0')				//EX: BADGERHOUSE added over B -> ADGER.
					{
						/* First backpatch the current node - it ends the newly split terminator.
						 * In the example, this would mean the node is the production from R -> ?
						 * This node ends the old BADGER, so we set it here.
						 */
						node->valset = oldvalset;
						if (node->valset)
						{
							new (&node->value) K(oldvalue);
						}

						/* The terminator was split up, but pieces of keyptr remain.
						 * We need to generate a new production, in this example, R -> H,
						 * with H being a terminator to OUSE.  Thus we get:
						 * B,A,D,G,E,R*,H*->OUSE (* = value set).
						 * NOTE: parent was last set at the end of the while loop.
						 */
						/* Get the new base and apply re-basing */
						q = x_check(*keyptr);
						node = &m_base[curidx];

						node->idx = q;
						node->mode = Node_Arc;
						lastidx = curidx;
						/* Finish the final node */
						curidx = q + charval(*keyptr);
						node = &m_base[curidx];
						keyptr++;
						/* Optimize - don't add to string table if there's nothing more to eat */
						if (*keyptr == '\0')
						{
							node->mode = Node_Arc;
						}
						else
						{
							node->idx = x_addstring(keyptr);
							node->mode = Node_Term;
						}
						node->parent = lastidx;
						node->valset = true;
						new (&node->value) K(obj);
					}
					else if (*keyptr == '\0')
					{	//EX: BADGER added over B -> ADGERHOUSE
						/* First backpatch the current node - it ends newly split input string.
						 * This is the exact opposite of the above procedure.
						 */
						node->valset = true;
						new (&node->value) K(obj);

						/* Get the new base and apply re-basing */
						q = x_check(*term);
						node = &m_base[curidx];

						node->idx = q;
						node->mode = Node_Arc;
						lastidx = curidx;
						/* Finish the final node */
						curidx = q + charval(*term);
						node = &m_base[curidx];
						term++;
						/* Optimize - don't add to string table if there's nothing more to eat */
						if (*term == '\0')
						{
							node->mode = Node_Arc;
						}
						else
						{
							node->idx = (term - m_stringtab); /* Already in the string table! */
							node->mode = Node_Term;
						}
						node->parent = lastidx;
						node->valset = oldvalset;
						if (node->valset)
						{
							new (&node->value) K(oldvalue);
						}
					}
					else
					{
						/* Finally, we have to create two new nodes instead of just one. */
						node->mode = Node_Arc;

						/* Get the new base and apply re-basing */
						q = x_check2(*keyptr, *term);
						node = &m_base[curidx];

						node->idx = q;
						lastidx = curidx;

						/* Re-create the old terminated node */
						curidx = q + charval(*term);
						node = &m_base[curidx];
						term++;
						node->valset = oldvalset;
						if (node->valset)
						{
							new (&node->value) K(oldvalue);
						}
						node->parent = lastidx;
						if (*term == '\0')
						{
							node->mode = Node_Arc;
						}
						else
						{
							node->mode = Node_Term;
							node->idx = (term - m_stringtab); /* Already in the string table! */
						}

						/* Create the new keyed input node */
						curidx = q + charval(*keyptr);
						node = &m_base[curidx];
						keyptr++;
						node->valset = true;
						new (&node->value) K(obj);
						node->parent = lastidx;
						if (*keyptr == '\0')
						{
							node->mode = Node_Arc;
						}
						else
						{
							node->mode = Node_Term;
							node->idx = x_addstring(keyptr);
						}
					}

					m_numElements++;

					/* Phew! */
					return true;
				}
				else
				{
					assert(node->mode == Node_Arc);
				}
			}
			lastidx = curidx;
		} while (*keyptr != '\0');

		assert(node);

		/* If we've exhausted the string and we have a valid reached node,
		 * the production rule already existed.  Make sure it's valid to set first.
		 */

		/* We have to be an Arc.  If the last result was anything else, we would have returned a new
		 * production earlier.
		 */
		assert(node->mode == Node_Arc);

		if (!node->valset)
		{
			node->valset = true;
			new (&node->value) K(obj);
			m_numElements++;
			return true;
		}

		return false;
	}
public:
	KTrie()
	{
		m_base = (KTrieNode *)malloc(sizeof(KTrieNode) * (256 + 1));
		m_stringtab = (char *)malloc(sizeof(char) * 256);
		m_baseSize = 256;
		m_stSize = 256;
		m_empty = NULL;
		m_numElements = 0;

		internal_clear();
	}
	~KTrie()
	{
		if (m_empty != NULL && m_empty->valset)
		{
			m_empty->value.~K();
			m_empty->valset = false;
		}
		free(m_empty);
		run_destructors();
		free(m_base);
		free(m_stringtab);
	}
	void run_destructor(void (*dtor)(K * ptr))
	{
		for (size_t i = 0; i <= m_baseSize; i++)
		{
			if (m_base[i].valset)
			{
				dtor(&m_base[i].value);
				m_base[i].valset = false;
			}
		}
	}
private:
	class KTrieNode
	{
		friend class KTrie;
	private:
		/**
		 * For Node_Arc, this index stores the 'base' offset to the next arc chain.
		 *   I.e. to jump from this arc to character C, it will be at base[idx+C].
		 * For Node_Term, this is an index into the string table.
		 */
		unsigned int idx;

		/**
		 * This contains the prior arc that we must have come from.
		 * For example, if arc 63 has a base jump of index 12, and we want to see if
		 * there is a valid character C, the parent of 12+C must be 63.
		 */
		unsigned int parent;
		K value;				/* Value associated with this node */
		NodeType mode;			/* Current usage type of the node */
		bool valset;			/* Whether or not a value is set */
	};
private:
	KTrieNode *internal_retrieve(const char *key)
	{
		unsigned int lastidx = 1;		/* the last node index */
		unsigned int curidx;			/* current node index */
		const char *keyptr = key;		/* input stream at current token */
		KTrieNode *node = NULL;			/* current node being processed */

		if (!*key)
		{
			return m_empty;
		}

		/* Start traversing at the root node */
		do
		{
			/* Find where the next character is, then advance */
			curidx = m_base[lastidx].idx;
			node = &m_base[curidx];
			curidx += charval(*keyptr);
			node = &m_base[curidx];
			keyptr++;

			/* Check if this slot is supposed to be empty or is a collision */
			if ((curidx > m_baseSize) || node->mode == Node_Unused || node->parent != lastidx)
			{
				return NULL;
			}
			else if (node->mode == Node_Term)
			{
				char *term = &m_stringtab[node->idx];
				if (strcmp(keyptr, term) == 0)
				{
					break;
				}
				else
				{
					return NULL;
				}
			}
			lastidx = curidx;
		} while (*keyptr != '\0');

		return node;
	}
	bool grow()
	{
		/* The current # of nodes in the tree is trie->baseSize + 1 */
		unsigned int cur_size = m_baseSize;
		unsigned int new_size = cur_size * 2;

		KTrieNode *new_base = (KTrieNode *)malloc((new_size + 1) * sizeof(KTrieNode));
		if (!new_base)
		{
			return false;
		}

		memcpy(new_base, m_base, sizeof(KTrieNode) * (m_baseSize + 1));
		memset(&new_base[cur_size + 1], 0, (new_size - cur_size) * sizeof(KTrieNode));

		for (size_t i = 0; i <= m_baseSize; i++)
		{
			if (m_base[i].valset)
			{
				/* Placement construct+copy the object, then placement destroy the old. */
				new (&new_base[i].value) K(m_base[i].value);
				m_base[i].value.~K();
			}
		}

		free(m_base);
		m_base = new_base;
		m_baseSize = new_size;

		return true;
	}
	inline unsigned char charval(char c)
	{
		return (unsigned char)c;
	}
	void internal_clear()
	{
		m_tail = 0;
		m_numElements = 0;

		memset(m_base, 0, sizeof(KTrieNode) * (m_baseSize + 1));
		memset(m_stringtab, 0, sizeof(char) * m_stSize);

		/* Sentinel root node */
		m_base[1].idx = 1;
		m_base[1].mode = Node_Arc;
		m_base[1].parent = 1;
	}
	void run_destructors()
	{
		for (size_t i = 0; i <= m_baseSize; i++)
		{
			if (m_base[i].valset)
			{
				m_base[i].value.~K();
			}
		}
	}
	unsigned int x_addstring(const char *ptr)
	{
		size_t len = strlen(ptr) + 1;

		if (m_tail + len >= m_stSize)
		{
			while (m_tail + len >= m_stSize)
			{
				m_stSize *= 2;
			}
			m_stringtab = (char *)realloc(m_stringtab,m_stSize);
		}

		unsigned int tail = m_tail;
		strcpy(&m_stringtab[tail], ptr);
		m_tail += len;

		return tail;
	}
	unsigned int x_check(char c, unsigned int start=1)
	{
		unsigned char _c = charval(c);
		unsigned int to_check = m_baseSize - _c;
		for (unsigned int i=start; i<=to_check; i++)
		{
			if (m_base[i+_c].mode == Node_Unused)
			{
				return i;
			}
		}

		grow();

		return x_check(c, to_check+1);
	}
	unsigned int x_check2(char c1, char c2, unsigned int start=1)
	{
		unsigned char _c1 = charval(c1);
		unsigned char _c2 = charval(c2);
		unsigned int to_check = m_baseSize - (_c1 > _c2 ? _c1 : _c2);
		for (unsigned int i=start; i<=to_check; i++)
		{
			if (m_base[i+_c1].mode == Node_Unused
				&& m_base[i+_c2].mode == Node_Unused)
			{
				return i;
			}
		}

		grow();

		return x_check2(c1, c2, to_check+1);
	}
	unsigned int x_check_multi(
		unsigned int offsets[],
		unsigned int count,
		unsigned int start=1)
	{
		KTrieNode *cur;
		unsigned int to_check = m_baseSize;
		unsigned int highest = 0;

		for (unsigned int i=0; i<count; i++)
		{
			if (offsets[i] > highest)
			{
				highest = offsets[i];
			}
		}

		to_check -= highest;

		for (unsigned int i=start; i<=to_check; i++)
		{
			bool okay = true;
			for (unsigned int j=0; j<count; j++)
			{
				cur = &m_base[i+offsets[j]];
				if (cur->mode != Node_Unused)
				{
					okay = false;
					break;
				}
			}
			if (okay)
			{
				return i;
			}
		}

		grow();

		return x_check_multi(offsets, count, to_check+1);
	}
public:
	size_t mem_usage()
	{
		return (sizeof(KTrieNode) * (m_baseSize))
			+ m_stSize
			+ sizeof(KTrieNode);
	}
	size_t size()
	{
		return m_numElements;
	}
private:
	KTrieNode *m_base;			/* Base array for the sparse tables */
	KTrieNode *m_empty;			/* Special case for empty strings */
	char *m_stringtab;			/* String table pointer */
	unsigned int m_baseSize;	/* Size of the base array, in members */
	unsigned int m_stSize;		/* Size of the string table, in bytes */
	unsigned int m_tail;		/* Current unused offset into the string table */
	size_t m_numElements;		/* Number of elements in use */
};

/**
 * Double Array Trie algorithm, based on:
 * An Efficient Implementation of Trie Structures, by
 *  Jun-ichi Aoe and Katsushi Maromoto, and Takashi Sato
 * from Software - Practice and Experience, Vol. 22(9), 695-721 (September 1992)
 *
 *  A Trie is a simple data structure which stores strings as DFAs, with each
 * transition state being a string entry.  For example, observe the following strings:
 *
 * BAILOPAN, BAT, BACON, BACK
 *  These transition as the follow production rules:
 *  B -> ...                  B
 *       A -> ...             BA
 *            I -> ...        BAI
 *                 LOPAN      BAILOPAN
 *            T -> ...        BAT
 *            C ->            BAC
 *                 O -> ...   BACO
 *                      N     BACON
 *                 K          BACK
 *
 *  The standard implementation for this - using lists - gives a slow linear lookup, somewhere between
 * O(N+M) or O(log n).  A faster implementation is proposed in the paper above, which is based on compacting
 * the transition states into two arrays.  In the paper's implementation, two arrays are used, and thus it is
 * called the "Double Array" algorithm.  However, the CHECK array's size is maintained the same as BASE,
 * so they can be combined into one structure.  The array seems complex at first, but is very simple: it is a
 * tree structure flattened out into a single vector.  I am calling this implementation the Flat Array Trie.
 *
 *  BASE[] is an array where each member is a node in the Trie.  The node can either be UNUSED (empty), an ARC
 * (containing an offset to the next set of ARCs), or a TERMINATOR (contains the rest of a string).
 * Each node has an index which must be interpreted based on the node type.  If the node is a TERMINATOR, then the
 * index is an index into a string table, to find the rest of the string.
 *  If the node is an ARC, the index is another index into BASE.  For each possible token that can follow the
 * current token, the value of those tokens can be added to the index given in the ARC.  Thus, given a current
 * position and the next desired token, the current arc will jump to another arc which can contain either:
 *   1) An invalid production (collision, no entry exists)
 *   2) An empty production (no entry exists)
 *   3) Another arc label (the string ends here or continues into more productions)
 *   4) A TERMINATOR (the string ends here and contains an unused set of productions)
 *
 *  So, given current offset N (starting at N=1), jumping to token C means the next offset will be:
 *      offs = BASE[n] + C
 *  Thus, the next node will be at:
 *      BASE[BASE[n] + C]
 *
 *  This allows each ARC to specify the base offset for any of its ARC children, like a tree.  Each node specifies
 * its parent ARC -- so if an invalid offset is specified, the parent will not match, and thus no such derived
 * string exists.
 *
 *  This means that arrays can be laid out "sparsely," maximizing their usage.  Note that N need not be related to
 * the range of tokens (1-256).  I.e., a base index does not have to be at 1, 256, 512, et cetera.  This is because
 * insertion comes with a small deal of complexity.  To insert a new set of tokens T, the algorithm finds a new
 * BASE index N such that BASE[N+T[i]] is unused for each T[i].  Thus, indirection is not necessarily linear;
 * traversing a chain of ARC nodes can _and will_ jump around BASE.
 *
 *  Of course, given this level of flexibility in the array organization, there are collisions.  This is largely
 * where insertions become slow, as the old chain must be relocated before the new one is used.  Relocation means
 * finding one or more new base indexes, and this means traversing BASE until an acceptable index is found, such
 * that each offset is unused (see description in previous paragraph).
 *
 *  However, it is not insertion time we are concerned about.  The "trie" name comes from reTRIEval.  We are only
 * concerned with lookup and deletion.  Both lookup and deletion are O(k), where k is relative to the length of the
 * input string.  Note that it is best case O(1) and worst case O(k).  Deleting the entire trie is always O(1).
 */

#endif //_INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_