From 7f667a58d06f1bd21a129cfa52314d6c395cb895 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Mon, 15 Oct 2007 19:38:18 +0000 Subject: [PATCH] fixed a trie lookup bug where terminated string arcs could be return false positives. for example, sm_k would match sm_kick, if and only if the sm_k formed the arc and the ick formed the termination --HG-- extra : convert_revision : svn%3A39bc706e-5318-0410-9160-8a85361fbb7c/trunk%401585 --- public/sm_trie_tpl.h | 60 +++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/public/sm_trie_tpl.h b/public/sm_trie_tpl.h index a317cf78..0902217d 100644 --- a/public/sm_trie_tpl.h +++ b/public/sm_trie_tpl.h @@ -179,7 +179,9 @@ public: if (*keyptr == '\0') { node->mode = Node_Arc; - } else { + } + else + { node->idx = x_addstring(keyptr); node->mode = Node_Term; } @@ -187,7 +189,9 @@ public: new (&node->value) K(obj); return true; - } else if (node->parent != lastidx) { + } + else if (node->parent != lastidx) + { /* Collision! We have to split up the tree here. CASE 4: * Insertion when a new word is inserted with a collision. * NOTE: This is the hardest case to handle. All below examples are based on: @@ -307,7 +311,9 @@ public: } } } - } else { + } + else + { unsigned int q = x_check_multi(outgoing_list, outgoing_count); node = &m_base[curidx]; @@ -372,7 +378,9 @@ public: if (*keyptr == '\0') { node->mode = Node_Arc; - } else { + } + else + { node->idx = x_addstring(keyptr); node->mode = Node_Term; } @@ -380,7 +388,9 @@ public: new (&node->value) K(obj); return true; - } else { + } + else + { /* See what's in the next node - special case if terminator! */ if (node->mode == Node_Term) { @@ -441,7 +451,9 @@ public: term++; keyptr++; } - } else if (node->valset) { + } + else if (node->valset) + { node->valset = false; node->value.~K(); } @@ -481,14 +493,18 @@ public: if (*keyptr == '\0') { node->mode = Node_Arc; - } else { + } + else + { node->idx = x_addstring(keyptr); node->mode = Node_Term; } node->parent = lastidx; node->valset = true; new (&node->value) K(obj); - } else if (*keyptr == '\0') { //EX: BADGER added over B -> ADGERHOUSE + } + else if (*keyptr == '\0') + { //EX: BADGER added over B -> ADGERHOUSE /* First backpatch the current node - it ends newly split input string. * This is the exact opposite of the above procedure. */ @@ -510,7 +526,9 @@ public: if (*term == '\0') { node->mode = Node_Arc; - } else { + } + else + { node->idx = (term - m_stringtab); /* Already in the string table! */ node->mode = Node_Term; } @@ -520,7 +538,9 @@ public: { new (&node->value) K(oldvalue); } - } else { + } + else + { /* Finally, we have to create two new nodes instead of just one. */ node->mode = Node_Arc; @@ -544,7 +564,9 @@ public: if (*term == '\0') { node->mode = Node_Arc; - } else { + } + else + { node->mode = Node_Term; node->idx = (term - m_stringtab); /* Already in the string table! */ } @@ -559,7 +581,9 @@ public: if (*keyptr == '\0') { node->mode = Node_Arc; - } else { + } + else + { node->mode = Node_Term; node->idx = x_addstring(keyptr); } @@ -567,7 +591,9 @@ public: /* Phew! */ return true; - } else { + } + else + { assert(node->mode == Node_Arc); } } @@ -659,12 +685,18 @@ private: if ((curidx > m_baseSize) || node->mode == Node_Unused || node->parent != lastidx) { return NULL; - } else if (node->mode == Node_Term) { + } + else if (node->mode == Node_Term) + { char *term = &m_stringtab[node->idx]; if (strcmp(keyptr, term) == 0) { break; } + else + { + return NULL; + } } lastidx = curidx; } while (*keyptr != '\0');