fixed a trie lookup bug where terminated string arcs could be return false positives. for example, sm_k would match sm_kick, if and only if the sm_k formed the arc and the ick formed the termination
--HG-- extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%401585
This commit is contained in:
parent
3c0aba7035
commit
7f667a58d0
@ -179,7 +179,9 @@ public:
|
|||||||
if (*keyptr == '\0')
|
if (*keyptr == '\0')
|
||||||
{
|
{
|
||||||
node->mode = Node_Arc;
|
node->mode = Node_Arc;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
node->idx = x_addstring(keyptr);
|
node->idx = x_addstring(keyptr);
|
||||||
node->mode = Node_Term;
|
node->mode = Node_Term;
|
||||||
}
|
}
|
||||||
@ -187,7 +189,9 @@ public:
|
|||||||
new (&node->value) K(obj);
|
new (&node->value) K(obj);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else if (node->parent != lastidx) {
|
}
|
||||||
|
else if (node->parent != lastidx)
|
||||||
|
{
|
||||||
/* Collision! We have to split up the tree here. CASE 4:
|
/* Collision! We have to split up the tree here. CASE 4:
|
||||||
* Insertion when a new word is inserted with a collision.
|
* Insertion when a new word is inserted with a collision.
|
||||||
* NOTE: This is the hardest case to handle. All below examples are based on:
|
* NOTE: This is the hardest case to handle. All below examples are based on:
|
||||||
@ -307,7 +311,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
unsigned int q = x_check_multi(outgoing_list, outgoing_count);
|
unsigned int q = x_check_multi(outgoing_list, outgoing_count);
|
||||||
|
|
||||||
node = &m_base[curidx];
|
node = &m_base[curidx];
|
||||||
@ -372,7 +378,9 @@ public:
|
|||||||
if (*keyptr == '\0')
|
if (*keyptr == '\0')
|
||||||
{
|
{
|
||||||
node->mode = Node_Arc;
|
node->mode = Node_Arc;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
node->idx = x_addstring(keyptr);
|
node->idx = x_addstring(keyptr);
|
||||||
node->mode = Node_Term;
|
node->mode = Node_Term;
|
||||||
}
|
}
|
||||||
@ -380,7 +388,9 @@ public:
|
|||||||
new (&node->value) K(obj);
|
new (&node->value) K(obj);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
/* See what's in the next node - special case if terminator! */
|
/* See what's in the next node - special case if terminator! */
|
||||||
if (node->mode == Node_Term)
|
if (node->mode == Node_Term)
|
||||||
{
|
{
|
||||||
@ -441,7 +451,9 @@ public:
|
|||||||
term++;
|
term++;
|
||||||
keyptr++;
|
keyptr++;
|
||||||
}
|
}
|
||||||
} else if (node->valset) {
|
}
|
||||||
|
else if (node->valset)
|
||||||
|
{
|
||||||
node->valset = false;
|
node->valset = false;
|
||||||
node->value.~K();
|
node->value.~K();
|
||||||
}
|
}
|
||||||
@ -481,14 +493,18 @@ public:
|
|||||||
if (*keyptr == '\0')
|
if (*keyptr == '\0')
|
||||||
{
|
{
|
||||||
node->mode = Node_Arc;
|
node->mode = Node_Arc;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
node->idx = x_addstring(keyptr);
|
node->idx = x_addstring(keyptr);
|
||||||
node->mode = Node_Term;
|
node->mode = Node_Term;
|
||||||
}
|
}
|
||||||
node->parent = lastidx;
|
node->parent = lastidx;
|
||||||
node->valset = true;
|
node->valset = true;
|
||||||
new (&node->value) K(obj);
|
new (&node->value) K(obj);
|
||||||
} else if (*keyptr == '\0') { //EX: BADGER added over B -> ADGERHOUSE
|
}
|
||||||
|
else if (*keyptr == '\0')
|
||||||
|
{ //EX: BADGER added over B -> ADGERHOUSE
|
||||||
/* First backpatch the current node - it ends newly split input string.
|
/* First backpatch the current node - it ends newly split input string.
|
||||||
* This is the exact opposite of the above procedure.
|
* This is the exact opposite of the above procedure.
|
||||||
*/
|
*/
|
||||||
@ -510,7 +526,9 @@ public:
|
|||||||
if (*term == '\0')
|
if (*term == '\0')
|
||||||
{
|
{
|
||||||
node->mode = Node_Arc;
|
node->mode = Node_Arc;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
node->idx = (term - m_stringtab); /* Already in the string table! */
|
node->idx = (term - m_stringtab); /* Already in the string table! */
|
||||||
node->mode = Node_Term;
|
node->mode = Node_Term;
|
||||||
}
|
}
|
||||||
@ -520,7 +538,9 @@ public:
|
|||||||
{
|
{
|
||||||
new (&node->value) K(oldvalue);
|
new (&node->value) K(oldvalue);
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
/* Finally, we have to create two new nodes instead of just one. */
|
/* Finally, we have to create two new nodes instead of just one. */
|
||||||
node->mode = Node_Arc;
|
node->mode = Node_Arc;
|
||||||
|
|
||||||
@ -544,7 +564,9 @@ public:
|
|||||||
if (*term == '\0')
|
if (*term == '\0')
|
||||||
{
|
{
|
||||||
node->mode = Node_Arc;
|
node->mode = Node_Arc;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
node->mode = Node_Term;
|
node->mode = Node_Term;
|
||||||
node->idx = (term - m_stringtab); /* Already in the string table! */
|
node->idx = (term - m_stringtab); /* Already in the string table! */
|
||||||
}
|
}
|
||||||
@ -559,7 +581,9 @@ public:
|
|||||||
if (*keyptr == '\0')
|
if (*keyptr == '\0')
|
||||||
{
|
{
|
||||||
node->mode = Node_Arc;
|
node->mode = Node_Arc;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
node->mode = Node_Term;
|
node->mode = Node_Term;
|
||||||
node->idx = x_addstring(keyptr);
|
node->idx = x_addstring(keyptr);
|
||||||
}
|
}
|
||||||
@ -567,7 +591,9 @@ public:
|
|||||||
|
|
||||||
/* Phew! */
|
/* Phew! */
|
||||||
return true;
|
return true;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
assert(node->mode == Node_Arc);
|
assert(node->mode == Node_Arc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -659,12 +685,18 @@ private:
|
|||||||
if ((curidx > m_baseSize) || node->mode == Node_Unused || node->parent != lastidx)
|
if ((curidx > m_baseSize) || node->mode == Node_Unused || node->parent != lastidx)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
} else if (node->mode == Node_Term) {
|
}
|
||||||
|
else if (node->mode == Node_Term)
|
||||||
|
{
|
||||||
char *term = &m_stringtab[node->idx];
|
char *term = &m_stringtab[node->idx];
|
||||||
if (strcmp(keyptr, term) == 0)
|
if (strcmp(keyptr, term) == 0)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
lastidx = curidx;
|
lastidx = curidx;
|
||||||
} while (*keyptr != '\0');
|
} while (*keyptr != '\0');
|
||||||
|
Loading…
Reference in New Issue
Block a user