fixed a problem causing the string table to never grow
fixed growth and zeroing of the base array (was corrupting) fixed parent being set wrong resulting in node access corruption fixed a really retarded loop bug fixed base array growth invalidating cached pointers fixed insertion of similar strings which both had valid arc paths but no terminators --HG-- extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%40198
This commit is contained in:
parent
f3ad0f5b67
commit
eabc33b4d2
103
core/sm_trie.cpp
103
core/sm_trie.cpp
@ -118,12 +118,31 @@ inline unsigned char charval(char c)
|
||||
return (unsigned char)c;
|
||||
}
|
||||
|
||||
unsigned int x_check(Trie *trie, char c)
|
||||
bool sm_trie_grow(Trie *trie)
|
||||
{
|
||||
/* The current # of nodes in the tree is trie->baseSize + 1 */
|
||||
unsigned int curSize = trie->baseSize;
|
||||
unsigned int newSize = curSize * 2;
|
||||
|
||||
//:TODO: Make functions calling this return failure if this fails
|
||||
trie->base = (TrieNode *)realloc(trie->base, (newSize + 1) * sizeof(TrieNode));
|
||||
if (!trie->base)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(&trie->base[curSize+1], 0, (newSize - curSize) * sizeof(TrieNode));
|
||||
trie->baseSize = newSize;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned int x_check(Trie *trie, char c, unsigned int start=1)
|
||||
{
|
||||
TrieNode *base = trie->base;
|
||||
unsigned char _c = charval(c);
|
||||
unsigned int to_check = trie->baseSize - _c;
|
||||
for (unsigned int i=1; i<to_check; i++)
|
||||
for (unsigned int i=start; i<=to_check; i++)
|
||||
{
|
||||
if (base[i+_c].mode == Node_Unused)
|
||||
{
|
||||
@ -131,21 +150,18 @@ unsigned int x_check(Trie *trie, char c)
|
||||
}
|
||||
}
|
||||
|
||||
trie->base = (TrieNode *)realloc(trie->base, trie->baseSize * sizeof(TrieNode) * 2);
|
||||
memset(trie->base + trie->baseSize, 0, trie->baseSize * sizeof(TrieNode));
|
||||
to_check = trie->baseSize;
|
||||
trie->baseSize *= 2;
|
||||
sm_trie_grow(trie);
|
||||
|
||||
return to_check;
|
||||
return x_check(trie, c, to_check+1);
|
||||
}
|
||||
|
||||
unsigned int x_check2(Trie *trie, char c1, char c2)
|
||||
unsigned int x_check2(Trie *trie, char c1, char c2, unsigned int start=1)
|
||||
{
|
||||
TrieNode *base = trie->base;
|
||||
unsigned char _c1 = charval(c1);
|
||||
unsigned char _c2 = charval(c2);
|
||||
unsigned int to_check = trie->baseSize - (_c1 > _c2 ? _c1 : _c2);
|
||||
for (unsigned int i=1; i<to_check; i++)
|
||||
for (unsigned int i=start; i<=to_check; i++)
|
||||
{
|
||||
if (base[i+_c1].mode == Node_Unused
|
||||
&& base[i+_c2].mode == Node_Unused)
|
||||
@ -154,22 +170,32 @@ unsigned int x_check2(Trie *trie, char c1, char c2)
|
||||
}
|
||||
}
|
||||
|
||||
trie->base = (TrieNode *)realloc(trie->base, trie->baseSize * sizeof(TrieNode) * 2);
|
||||
memset(trie->base + trie->baseSize, 0, trie->baseSize * sizeof(TrieNode));
|
||||
to_check = trie->baseSize;
|
||||
trie->baseSize *= 2;
|
||||
sm_trie_grow(trie);
|
||||
|
||||
return to_check;
|
||||
return x_check2(trie, c1, c2, to_check+1);
|
||||
}
|
||||
|
||||
unsigned int x_check_multi(Trie *trie,
|
||||
unsigned int offsets[],
|
||||
unsigned int count)
|
||||
unsigned int count,
|
||||
unsigned int start=1)
|
||||
{
|
||||
TrieNode *base = trie->base;
|
||||
TrieNode *cur;
|
||||
unsigned int to_check = trie->baseSize;
|
||||
for (unsigned int i=1; i<to_check; i++)
|
||||
unsigned int highest = 0;
|
||||
|
||||
for (unsigned int i=0; i<count; i++)
|
||||
{
|
||||
if (offsets[i] > highest)
|
||||
{
|
||||
highest = offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
to_check -= highest;
|
||||
|
||||
for (unsigned int i=start; i<=to_check; i++)
|
||||
{
|
||||
bool okay = true;
|
||||
for (unsigned int j=0; j<count; j++)
|
||||
@ -187,19 +213,16 @@ unsigned int x_check_multi(Trie *trie,
|
||||
}
|
||||
}
|
||||
|
||||
trie->base = (TrieNode *)realloc(trie->base, trie->baseSize * sizeof(TrieNode) * 2);
|
||||
memset(trie->base + trie->baseSize, 0, trie->baseSize * sizeof(TrieNode));
|
||||
to_check = trie->baseSize;
|
||||
trie->baseSize *= 2;
|
||||
sm_trie_grow(trie);
|
||||
|
||||
return to_check;
|
||||
return x_check_multi(trie, offsets, count, to_check+1);
|
||||
}
|
||||
|
||||
unsigned int x_addstring(Trie *trie, const char *ptr)
|
||||
{
|
||||
size_t len = strlen(ptr) + 1;
|
||||
|
||||
if (len > trie->stSize)
|
||||
if (trie->tail + len >= trie->stSize)
|
||||
{
|
||||
while (trie->tail + len >= trie->stSize)
|
||||
{
|
||||
@ -219,13 +242,13 @@ Trie *sm_trie_create()
|
||||
{
|
||||
Trie *t = new Trie;
|
||||
|
||||
t->base = (TrieNode *)malloc(sizeof(TrieNode) * 256);
|
||||
t->base = (TrieNode *)malloc(sizeof(TrieNode) * (256 + 1));
|
||||
t->stringtab = (char *)malloc(sizeof(char) * 256);
|
||||
t->baseSize = 256;
|
||||
t->stSize = 256;
|
||||
t->tail = 0;
|
||||
|
||||
memset(t->base, 0, sizeof(TrieNode) * 256);
|
||||
memset(t->base, 0, sizeof(TrieNode) * (256 + 1));
|
||||
memset(t->stringtab, 0, sizeof(char) * 256);
|
||||
|
||||
/* Sentinel root node */
|
||||
@ -373,6 +396,7 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
/* Now we need to find all the arcs leaving our parent...
|
||||
* Note: the inconsistency is the base of our parent.
|
||||
*/
|
||||
assert(base[node->parent].mode == Node_Arc);
|
||||
unsigned int incoming_list[256];
|
||||
unsigned int incoming_base = base[node->parent].idx;
|
||||
unsigned int incoming_count = 0;
|
||||
@ -392,8 +416,11 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
{
|
||||
unsigned int q = x_check_multi(trie, incoming_list, incoming_count);
|
||||
|
||||
base = trie->base;
|
||||
node = &base[curidx];
|
||||
|
||||
/* If we're incoming, we need to modify our parent */
|
||||
base[incoming_base].idx = q;
|
||||
base[node->parent].idx = q;
|
||||
|
||||
/* For each node in the "to move" list,
|
||||
* Relocate the node's info to the new position.
|
||||
@ -415,7 +442,7 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
if (base[newidx].mode == Node_Arc)
|
||||
{
|
||||
TrieNode *check_base = &base[base[newidx].idx] + 1;
|
||||
for (unsigned int i=1; i<=255; i++, check_base++)
|
||||
for (unsigned int j=1; j<=255; j++, check_base++)
|
||||
{
|
||||
if (check_base->parent == oldidx)
|
||||
{
|
||||
@ -427,6 +454,9 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
} else {
|
||||
unsigned int q = x_check_multi(trie, outgoing_list, outgoing_count);
|
||||
|
||||
base = trie->base;
|
||||
node = &base[curidx];
|
||||
|
||||
/* If we're outgoing, we need to modify our own base */
|
||||
base[lastidx].idx = q;
|
||||
|
||||
@ -455,7 +485,7 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
if (base[newidx].mode == Node_Arc)
|
||||
{
|
||||
TrieNode *check_base = &base[base[newidx].idx] + 1;
|
||||
for (unsigned int i=1; i<=255; i++, check_base++)
|
||||
for (unsigned int j=1; j<=255; j++, check_base++)
|
||||
{
|
||||
if (check_base->parent == oldidx)
|
||||
{
|
||||
@ -505,12 +535,16 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
*/
|
||||
void *oldvalue = node->value;
|
||||
bool oldvalset = node->valset;
|
||||
if (*term == *keyptr)
|
||||
{
|
||||
while (*term == *keyptr)
|
||||
{
|
||||
/* Find the next free slot in the check array.
|
||||
* This is the "vector base" essentially
|
||||
*/
|
||||
q = x_check(trie, *term);
|
||||
base = trie->base;
|
||||
node = &base[curidx];
|
||||
/* Point the node to the next new base */
|
||||
node->idx = q;
|
||||
node->mode = Node_Arc;
|
||||
@ -525,6 +559,9 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
*term = '\0'; /* Unmark the string table here */
|
||||
term++;
|
||||
keyptr++;
|
||||
}
|
||||
} else {
|
||||
node->valset = false;
|
||||
}
|
||||
/* We're done inserting new pairs. If one of them is exhausted,
|
||||
* we take special shortcuts.
|
||||
@ -544,7 +581,11 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
* B,A,D,G,E,R*,H*->OUSE (* = value set).
|
||||
* NOTE: parent was last set at the end of the while loop.
|
||||
*/
|
||||
/* Get the new base and apply re-basing */
|
||||
q = x_check(trie, *keyptr);
|
||||
base = trie->base;
|
||||
node = &base[curidx];
|
||||
|
||||
node->idx = q;
|
||||
node->mode = Node_Arc;
|
||||
lastidx = curidx;
|
||||
@ -570,7 +611,11 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
node->valset = true;
|
||||
node->value = value;
|
||||
|
||||
/* Get the new base and apply re-basing */
|
||||
q = x_check(trie, *term);
|
||||
base = trie->base;
|
||||
node = &base[curidx];
|
||||
|
||||
node->idx = q;
|
||||
node->mode = Node_Arc;
|
||||
lastidx = curidx;
|
||||
@ -593,7 +638,11 @@ bool sm_trie_insert(Trie *trie, const char *key, void *value)
|
||||
/* Finally, we have to create two new nodes instead of just one. */
|
||||
node->mode = Node_Arc;
|
||||
|
||||
/* Get the new base and apply re-basing */
|
||||
q = x_check2(trie, *keyptr, *term);
|
||||
base = trie->base;
|
||||
node = &base[curidx];
|
||||
|
||||
node->idx = q;
|
||||
lastidx = curidx;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user