1277 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1277 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // -*- coding: utf-8 -*-
 | |
| //
 | |
| // Copyright (c) 2005 - 2006, Google Inc.
 | |
| // All rights reserved.
 | |
| //
 | |
| // Redistribution and use in source and binary forms, with or without
 | |
| // modification, are permitted provided that the following conditions are
 | |
| // met:
 | |
| //
 | |
| //     * Redistributions of source code must retain the above copyright
 | |
| // notice, this list of conditions and the following disclaimer.
 | |
| //     * Redistributions in binary form must reproduce the above
 | |
| // copyright notice, this list of conditions and the following disclaimer
 | |
| // in the documentation and/or other materials provided with the
 | |
| // distribution.
 | |
| //     * Neither the name of Google Inc. nor the names of its
 | |
| // contributors may be used to endorse or promote products derived from
 | |
| // this software without specific prior written permission.
 | |
| //
 | |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| //
 | |
| // Author: Sanjay Ghemawat
 | |
| //
 | |
| // TODO: Test extractions for PartialMatch/Consume
 | |
| 
 | |
| #ifdef HAVE_CONFIG_H
 | |
| #include "config.h"
 | |
| #endif
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <cassert>
 | |
| #include <vector>
 | |
| #include "pcrecpp.h"
 | |
| 
 | |
| using pcrecpp::StringPiece;
 | |
| using pcrecpp::RE;
 | |
| using pcrecpp::RE_Options;
 | |
| using pcrecpp::Hex;
 | |
| using pcrecpp::Octal;
 | |
| using pcrecpp::CRadix;
 | |
| 
 | |
| static bool VERBOSE_TEST  = false;
 | |
| 
 | |
| // CHECK dies with a fatal error if condition is not true.  It is *not*
 | |
| // controlled by NDEBUG, so the check will be executed regardless of
 | |
| // compilation mode.  Therefore, it is safe to do things like:
 | |
| //    CHECK_EQ(fp->Write(x), 4)
 | |
| #define CHECK(condition) do {                           \
 | |
|   if (!(condition)) {                                   \
 | |
|     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
 | |
|             __FILE__, __LINE__, #condition);            \
 | |
|     exit(1);                                            \
 | |
|   }                                                     \
 | |
| } while (0)
 | |
| 
 | |
| #define CHECK_EQ(a, b)   CHECK(a == b)
 | |
| 
 | |
| static void Timing1(int num_iters) {
 | |
|   // Same pattern lots of times
 | |
|   RE pattern("ruby:\\d+");
 | |
|   StringPiece p("ruby:1234");
 | |
|   for (int j = num_iters; j > 0; j--) {
 | |
|     CHECK(pattern.FullMatch(p));
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void Timing2(int num_iters) {
 | |
|   // Same pattern lots of times
 | |
|   RE pattern("ruby:(\\d+)");
 | |
|   int i;
 | |
|   for (int j = num_iters; j > 0; j--) {
 | |
|     CHECK(pattern.FullMatch("ruby:1234", &i));
 | |
|     CHECK_EQ(i, 1234);
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void Timing3(int num_iters) {
 | |
|   string text_string;
 | |
|   for (int j = num_iters; j > 0; j--) {
 | |
|     text_string += "this is another line\n";
 | |
|   }
 | |
| 
 | |
|   RE line_matcher(".*\n");
 | |
|   string line;
 | |
|   StringPiece text(text_string);
 | |
|   int counter = 0;
 | |
|   while (line_matcher.Consume(&text)) {
 | |
|     counter++;
 | |
|   }
 | |
|   printf("Matched %d lines\n", counter);
 | |
| }
 | |
| 
 | |
| #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
 | |
| 
 | |
| static void LeakTest() {
 | |
|   // Check for memory leaks
 | |
|   unsigned long long initial_size = 0;
 | |
|   for (int i = 0; i < 100000; i++) {
 | |
|     if (i == 50000) {
 | |
|       initial_size = VirtualProcessSize();
 | |
|       printf("Size after 50000: %llu\n", initial_size);
 | |
|     }
 | |
|     char buf[100];  // definitely big enough
 | |
|     sprintf(buf, "pat%09d", i);
 | |
|     RE newre(buf);
 | |
|   }
 | |
|   uint64 final_size = VirtualProcessSize();
 | |
|   printf("Size after 100000: %llu\n", final_size);
 | |
|   const double growth = double(final_size - initial_size) / final_size;
 | |
|   printf("Growth: %0.2f%%", growth * 100);
 | |
|   CHECK(growth < 0.02);       // Allow < 2% growth
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| static void RadixTests() {
 | |
|   printf("Testing hex\n");
 | |
| 
 | |
| #define CHECK_HEX(type, value) \
 | |
|   do { \
 | |
|     type v; \
 | |
|     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
 | |
|     CHECK_EQ(v, 0x ## value); \
 | |
|     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
 | |
|     CHECK_EQ(v, 0x ## value); \
 | |
|   } while(0)
 | |
| 
 | |
|   CHECK_HEX(short,              2bad);
 | |
|   CHECK_HEX(unsigned short,     2badU);
 | |
|   CHECK_HEX(int,                dead);
 | |
|   CHECK_HEX(unsigned int,       deadU);
 | |
|   CHECK_HEX(long,               7eadbeefL);
 | |
|   CHECK_HEX(unsigned long,      deadbeefUL);
 | |
| #ifdef HAVE_LONG_LONG
 | |
|   CHECK_HEX(long long,          12345678deadbeefLL);
 | |
| #endif
 | |
| #ifdef HAVE_UNSIGNED_LONG_LONG
 | |
|   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
 | |
| #endif
 | |
| 
 | |
| #undef CHECK_HEX
 | |
| 
 | |
|   printf("Testing octal\n");
 | |
| 
 | |
| #define CHECK_OCTAL(type, value) \
 | |
|   do { \
 | |
|     type v; \
 | |
|     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
 | |
|     CHECK_EQ(v, 0 ## value); \
 | |
|     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
 | |
|     CHECK_EQ(v, 0 ## value); \
 | |
|   } while(0)
 | |
| 
 | |
|   CHECK_OCTAL(short,              77777);
 | |
|   CHECK_OCTAL(unsigned short,     177777U);
 | |
|   CHECK_OCTAL(int,                17777777777);
 | |
|   CHECK_OCTAL(unsigned int,       37777777777U);
 | |
|   CHECK_OCTAL(long,               17777777777L);
 | |
|   CHECK_OCTAL(unsigned long,      37777777777UL);
 | |
| #ifdef HAVE_LONG_LONG
 | |
|   CHECK_OCTAL(long long,          777777777777777777777LL);
 | |
| #endif
 | |
| #ifdef HAVE_UNSIGNED_LONG_LONG
 | |
|   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
 | |
| #endif
 | |
| 
 | |
| #undef CHECK_OCTAL
 | |
| 
 | |
|   printf("Testing decimal\n");
 | |
| 
 | |
| #define CHECK_DECIMAL(type, value) \
 | |
|   do { \
 | |
|     type v; \
 | |
|     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
 | |
|     CHECK_EQ(v, value); \
 | |
|     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
 | |
|     CHECK_EQ(v, value); \
 | |
|   } while(0)
 | |
| 
 | |
|   CHECK_DECIMAL(short,              -1);
 | |
|   CHECK_DECIMAL(unsigned short,     9999);
 | |
|   CHECK_DECIMAL(int,                -1000);
 | |
|   CHECK_DECIMAL(unsigned int,       12345U);
 | |
|   CHECK_DECIMAL(long,               -10000000L);
 | |
|   CHECK_DECIMAL(unsigned long,      3083324652U);
 | |
| #ifdef HAVE_LONG_LONG
 | |
|   CHECK_DECIMAL(long long,          -100000000000000LL);
 | |
| #endif
 | |
| #ifdef HAVE_UNSIGNED_LONG_LONG
 | |
|   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
 | |
| #endif
 | |
| 
 | |
| #undef CHECK_DECIMAL
 | |
| 
 | |
| }
 | |
| 
 | |
| static void TestReplace() {
 | |
|   printf("Testing Replace\n");
 | |
| 
 | |
|   struct ReplaceTest {
 | |
|     const char *regexp;
 | |
|     const char *rewrite;
 | |
|     const char *original;
 | |
|     const char *single;
 | |
|     const char *global;
 | |
|     int global_count;         // the expected return value from ReplaceAll
 | |
|   };
 | |
|   static const ReplaceTest tests[] = {
 | |
|     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
 | |
|       "\\2\\1ay",
 | |
|       "the quick brown fox jumps over the lazy dogs.",
 | |
|       "ethay quick brown fox jumps over the lazy dogs.",
 | |
|       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
 | |
|       9 },
 | |
|     { "\\w+",
 | |
|       "\\0-NOSPAM",
 | |
|       "paul.haahr@google.com",
 | |
|       "paul-NOSPAM.haahr@google.com",
 | |
|       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
 | |
|       4 },
 | |
|     { "^",
 | |
|       "(START)",
 | |
|       "foo",
 | |
|       "(START)foo",
 | |
|       "(START)foo",
 | |
|       1 },
 | |
|     { "^",
 | |
|       "(START)",
 | |
|       "",
 | |
|       "(START)",
 | |
|       "(START)",
 | |
|       1 },
 | |
|     { "$",
 | |
|       "(END)",
 | |
|       "",
 | |
|       "(END)",
 | |
|       "(END)",
 | |
|       1 },
 | |
|     { "b",
 | |
|       "bb",
 | |
|       "ababababab",
 | |
|       "abbabababab",
 | |
|       "abbabbabbabbabb",
 | |
|        5 },
 | |
|     { "b",
 | |
|       "bb",
 | |
|       "bbbbbb",
 | |
|       "bbbbbbb",
 | |
|       "bbbbbbbbbbbb",
 | |
|       6 },
 | |
|     { "b+",
 | |
|       "bb",
 | |
|       "bbbbbb",
 | |
|       "bb",
 | |
|       "bb",
 | |
|       1 },
 | |
|     { "b*",
 | |
|       "bb",
 | |
|       "bbbbbb",
 | |
|       "bb",
 | |
|       "bb",
 | |
|       1 },
 | |
|     { "b*",
 | |
|       "bb",
 | |
|       "aaaaa",
 | |
|       "bbaaaaa",
 | |
|       "bbabbabbabbabbabb",
 | |
|       6 },
 | |
|     { "b*",
 | |
|       "bb",
 | |
|       "aa\naa\n",
 | |
|       "bbaa\naa\n",
 | |
|       "bbabbabb\nbbabbabb\nbb",
 | |
|       7 },
 | |
|     { "b*",
 | |
|       "bb",
 | |
|       "aa\raa\r",
 | |
|       "bbaa\raa\r",
 | |
|       "bbabbabb\rbbabbabb\rbb",
 | |
|       7 },
 | |
|     { "b*",
 | |
|       "bb",
 | |
|       "aa\r\naa\r\n",
 | |
|       "bbaa\r\naa\r\n",
 | |
|       "bbabbabb\r\nbbabbabb\r\nbb",
 | |
|       7 },
 | |
| #ifdef SUPPORT_UTF8
 | |
|     { "b*",
 | |
|       "bb",
 | |
|       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
 | |
|       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
 | |
|       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
 | |
|       5 },
 | |
|     { "b*",
 | |
|       "bb",
 | |
|       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
 | |
|       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
 | |
|       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
 | |
|        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
 | |
|       9 },
 | |
| #endif
 | |
|     { "", NULL, NULL, NULL, NULL, 0 }
 | |
|   };
 | |
| 
 | |
| #ifdef SUPPORT_UTF8
 | |
|   const bool support_utf8 = true;
 | |
| #else
 | |
|   const bool support_utf8 = false;
 | |
| #endif
 | |
| 
 | |
|   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
 | |
|     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
 | |
|     assert(re.error().empty());
 | |
|     string one(t->original);
 | |
|     CHECK(re.Replace(t->rewrite, &one));
 | |
|     CHECK_EQ(one, t->single);
 | |
|     string all(t->original);
 | |
|     const int replace_count = re.GlobalReplace(t->rewrite, &all);
 | |
|     CHECK_EQ(all, t->global);
 | |
|     CHECK_EQ(replace_count, t->global_count);
 | |
|   }
 | |
| 
 | |
|   // One final test: test \r\n replacement when we're not in CRLF mode
 | |
|   {
 | |
|     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
 | |
|     assert(re.error().empty());
 | |
|     string all("aa\r\naa\r\n");
 | |
|     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
 | |
|     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
 | |
|   }
 | |
|   {
 | |
|     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
 | |
|     assert(re.error().empty());
 | |
|     string all("aa\r\naa\r\n");
 | |
|     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
 | |
|     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
 | |
|   }
 | |
|   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
 | |
|   //       Alas, the answer depends on how pcre was compiled.
 | |
| }
 | |
| 
 | |
| static void TestExtract() {
 | |
|   printf("Testing Extract\n");
 | |
| 
 | |
|   string s;
 | |
| 
 | |
|   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
 | |
|   CHECK_EQ(s, "kremvax!boris");
 | |
| 
 | |
|   // check the RE interface as well
 | |
|   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
 | |
|   CHECK_EQ(s, "'foo'");
 | |
|   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
 | |
|   CHECK_EQ(s, "'foo'");
 | |
| }
 | |
| 
 | |
| static void TestConsume() {
 | |
|   printf("Testing Consume\n");
 | |
| 
 | |
|   string word;
 | |
| 
 | |
|   string s("   aaa b!@#$@#$cccc");
 | |
|   StringPiece input(s);
 | |
| 
 | |
|   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
 | |
|   CHECK(r.Consume(&input, &word));
 | |
|   CHECK_EQ(word, "aaa");
 | |
|   CHECK(r.Consume(&input, &word));
 | |
|   CHECK_EQ(word, "b");
 | |
|   CHECK(! r.Consume(&input, &word));
 | |
| }
 | |
| 
 | |
| static void TestFindAndConsume() {
 | |
|   printf("Testing FindAndConsume\n");
 | |
| 
 | |
|   string word;
 | |
| 
 | |
|   string s("   aaa b!@#$@#$cccc");
 | |
|   StringPiece input(s);
 | |
| 
 | |
|   RE r("(\\w+)");      // matches a word
 | |
|   CHECK(r.FindAndConsume(&input, &word));
 | |
|   CHECK_EQ(word, "aaa");
 | |
|   CHECK(r.FindAndConsume(&input, &word));
 | |
|   CHECK_EQ(word, "b");
 | |
|   CHECK(r.FindAndConsume(&input, &word));
 | |
|   CHECK_EQ(word, "cccc");
 | |
|   CHECK(! r.FindAndConsume(&input, &word));
 | |
| }
 | |
| 
 | |
| static void TestMatchNumberPeculiarity() {
 | |
|   printf("Testing match-number peculiaraity\n");
 | |
| 
 | |
|   string word1;
 | |
|   string word2;
 | |
|   string word3;
 | |
| 
 | |
|   RE r("(foo)|(bar)|(baz)");
 | |
|   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
 | |
|   CHECK_EQ(word1, "foo");
 | |
|   CHECK_EQ(word2, "");
 | |
|   CHECK_EQ(word3, "");
 | |
|   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
 | |
|   CHECK_EQ(word1, "");
 | |
|   CHECK_EQ(word2, "bar");
 | |
|   CHECK_EQ(word3, "");
 | |
|   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
 | |
|   CHECK_EQ(word1, "");
 | |
|   CHECK_EQ(word2, "");
 | |
|   CHECK_EQ(word3, "baz");
 | |
|   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
 | |
| 
 | |
|   string a;
 | |
|   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
 | |
|   CHECK_EQ(a, "");
 | |
| }
 | |
| 
 | |
| static void TestRecursion() {
 | |
|   printf("Testing recursion\n");
 | |
| 
 | |
|   // Get one string that passes (sometimes), one that never does.
 | |
|   string text_good("abcdefghijk");
 | |
|   string text_bad("acdefghijkl");
 | |
| 
 | |
|   // According to pcretest, matching text_good against (\w+)*b
 | |
|   // requires match_limit of at least 8192, and match_recursion_limit
 | |
|   // of at least 37.
 | |
| 
 | |
|   RE_Options options_ml;
 | |
|   options_ml.set_match_limit(8192);
 | |
|   RE re("(\\w+)*b", options_ml);
 | |
|   CHECK(re.PartialMatch(text_good) == true);
 | |
|   CHECK(re.PartialMatch(text_bad) == false);
 | |
|   CHECK(re.FullMatch(text_good) == false);
 | |
|   CHECK(re.FullMatch(text_bad) == false);
 | |
| 
 | |
|   options_ml.set_match_limit(1024);
 | |
|   RE re2("(\\w+)*b", options_ml);
 | |
|   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
 | |
|   CHECK(re2.PartialMatch(text_bad) == false);
 | |
|   CHECK(re2.FullMatch(text_good) == false);
 | |
|   CHECK(re2.FullMatch(text_bad) == false);
 | |
| 
 | |
|   RE_Options options_mlr;
 | |
|   options_mlr.set_match_limit_recursion(50);
 | |
|   RE re3("(\\w+)*b", options_mlr);
 | |
|   CHECK(re3.PartialMatch(text_good) == true);
 | |
|   CHECK(re3.PartialMatch(text_bad) == false);
 | |
|   CHECK(re3.FullMatch(text_good) == false);
 | |
|   CHECK(re3.FullMatch(text_bad) == false);
 | |
| 
 | |
|   options_mlr.set_match_limit_recursion(10);
 | |
|   RE re4("(\\w+)*b", options_mlr);
 | |
|   CHECK(re4.PartialMatch(text_good) == false);
 | |
|   CHECK(re4.PartialMatch(text_bad) == false);
 | |
|   CHECK(re4.FullMatch(text_good) == false);
 | |
|   CHECK(re4.FullMatch(text_bad) == false);
 | |
| }
 | |
| 
 | |
| // A meta-quoted string, interpreted as a pattern, should always match
 | |
| // the original unquoted string.
 | |
| static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
 | |
|   string quoted = RE::QuoteMeta(unquoted);
 | |
|   RE re(quoted, options);
 | |
|   CHECK(re.FullMatch(unquoted));
 | |
| }
 | |
| 
 | |
| // A string containing meaningful regexp characters, which is then meta-
 | |
| // quoted, should not generally match a string the unquoted string does.
 | |
| static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
 | |
|                                   RE_Options options = RE_Options()) {
 | |
|   string quoted = RE::QuoteMeta(unquoted);
 | |
|   RE re(quoted, options);
 | |
|   CHECK(!re.FullMatch(should_not_match));
 | |
| }
 | |
| 
 | |
| // Tests that quoted meta characters match their original strings,
 | |
| // and that a few things that shouldn't match indeed do not.
 | |
| static void TestQuotaMetaSimple() {
 | |
|   TestQuoteMeta("foo");
 | |
|   TestQuoteMeta("foo.bar");
 | |
|   TestQuoteMeta("foo\\.bar");
 | |
|   TestQuoteMeta("[1-9]");
 | |
|   TestQuoteMeta("1.5-2.0?");
 | |
|   TestQuoteMeta("\\d");
 | |
|   TestQuoteMeta("Who doesn't like ice cream?");
 | |
|   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
 | |
|   TestQuoteMeta("((?!)xxx).*yyy");
 | |
|   TestQuoteMeta("([");
 | |
|   TestQuoteMeta(string("foo\0bar", 7));
 | |
| }
 | |
| 
 | |
| static void TestQuoteMetaSimpleNegative() {
 | |
|   NegativeTestQuoteMeta("foo", "bar");
 | |
|   NegativeTestQuoteMeta("...", "bar");
 | |
|   NegativeTestQuoteMeta("\\.", ".");
 | |
|   NegativeTestQuoteMeta("\\.", "..");
 | |
|   NegativeTestQuoteMeta("(a)", "a");
 | |
|   NegativeTestQuoteMeta("(a|b)", "a");
 | |
|   NegativeTestQuoteMeta("(a|b)", "(a)");
 | |
|   NegativeTestQuoteMeta("(a|b)", "a|b");
 | |
|   NegativeTestQuoteMeta("[0-9]", "0");
 | |
|   NegativeTestQuoteMeta("[0-9]", "0-9");
 | |
|   NegativeTestQuoteMeta("[0-9]", "[9]");
 | |
|   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
 | |
| }
 | |
| 
 | |
| static void TestQuoteMetaLatin1() {
 | |
|   TestQuoteMeta("3\xb2 = 9");
 | |
| }
 | |
| 
 | |
| static void TestQuoteMetaUtf8() {
 | |
| #ifdef SUPPORT_UTF8
 | |
|   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
 | |
|   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
 | |
|   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
 | |
|   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
 | |
|   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
 | |
|   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
 | |
|   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
 | |
|   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
 | |
|                         "27\\\xc2\\\xb0",
 | |
|                         pcrecpp::UTF8());
 | |
| #endif
 | |
| }
 | |
| 
 | |
| static void TestQuoteMetaAll() {
 | |
|   printf("Testing QuoteMeta\n");
 | |
|   TestQuotaMetaSimple();
 | |
|   TestQuoteMetaSimpleNegative();
 | |
|   TestQuoteMetaLatin1();
 | |
|   TestQuoteMetaUtf8();
 | |
| }
 | |
| 
 | |
| //
 | |
| // Options tests contributed by
 | |
| // Giuseppe Maxia, CTO, Stardata s.r.l.
 | |
| // July 2005
 | |
| //
 | |
| static void GetOneOptionResult(
 | |
|                 const char *option_name,
 | |
|                 const char *regex,
 | |
|                 const char *str,
 | |
|                 RE_Options options,
 | |
|                 bool full,
 | |
|                 string expected) {
 | |
| 
 | |
|   printf("Testing Option <%s>\n", option_name);
 | |
|   if(VERBOSE_TEST)
 | |
|     printf("/%s/ finds \"%s\" within \"%s\" \n",
 | |
|                     regex,
 | |
|                     expected.c_str(),
 | |
|                     str);
 | |
|   string captured("");
 | |
|   if (full)
 | |
|     RE(regex,options).FullMatch(str, &captured);
 | |
|   else
 | |
|     RE(regex,options).PartialMatch(str, &captured);
 | |
|   CHECK_EQ(captured, expected);
 | |
| }
 | |
| 
 | |
| static void TestOneOption(
 | |
|                 const char *option_name,
 | |
|                 const char *regex,
 | |
|                 const char *str,
 | |
|                 RE_Options options,
 | |
|                 bool full,
 | |
|                 bool assertive = true) {
 | |
| 
 | |
|   printf("Testing Option <%s>\n", option_name);
 | |
|   if (VERBOSE_TEST)
 | |
|     printf("'%s' %s /%s/ \n",
 | |
|                   str,
 | |
|                   (assertive? "matches" : "doesn't match"),
 | |
|                   regex);
 | |
|   if (assertive) {
 | |
|     if (full)
 | |
|       CHECK(RE(regex,options).FullMatch(str));
 | |
|     else
 | |
|       CHECK(RE(regex,options).PartialMatch(str));
 | |
|   } else {
 | |
|     if (full)
 | |
|       CHECK(!RE(regex,options).FullMatch(str));
 | |
|     else
 | |
|       CHECK(!RE(regex,options).PartialMatch(str));
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void Test_CASELESS() {
 | |
|   RE_Options options;
 | |
|   RE_Options options2;
 | |
| 
 | |
|   options.set_caseless(true);
 | |
|   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
 | |
|   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
 | |
|   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
 | |
| 
 | |
|   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
 | |
|   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
 | |
|   options.set_caseless(false);
 | |
|   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
 | |
| }
 | |
| 
 | |
| static void Test_MULTILINE() {
 | |
|   RE_Options options;
 | |
|   RE_Options options2;
 | |
|   const char *str = "HELLO\n" "cruel\n" "world\n";
 | |
| 
 | |
|   options.set_multiline(true);
 | |
|   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
 | |
|   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
 | |
|   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
 | |
|   options.set_multiline(false);
 | |
|   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
 | |
| }
 | |
| 
 | |
| static void Test_DOTALL() {
 | |
|   RE_Options options;
 | |
|   RE_Options options2;
 | |
|   const char *str = "HELLO\n" "cruel\n" "world";
 | |
| 
 | |
|   options.set_dotall(true);
 | |
|   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
 | |
|   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
 | |
|   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
 | |
|   options.set_dotall(false);
 | |
|   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
 | |
| }
 | |
| 
 | |
| static void Test_DOLLAR_ENDONLY() {
 | |
|   RE_Options options;
 | |
|   RE_Options options2;
 | |
|   const char *str = "HELLO world\n";
 | |
| 
 | |
|   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
 | |
|   options.set_dollar_endonly(true);
 | |
|   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
 | |
|   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
 | |
| }
 | |
| 
 | |
| static void Test_EXTRA() {
 | |
|   RE_Options options;
 | |
|   const char *str = "HELLO";
 | |
| 
 | |
|   options.set_extra(true);
 | |
|   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
 | |
|   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
 | |
|   options.set_extra(false);
 | |
|   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
 | |
| }
 | |
| 
 | |
| static void Test_EXTENDED() {
 | |
|   RE_Options options;
 | |
|   RE_Options options2;
 | |
|   const char *str = "HELLO world";
 | |
| 
 | |
|   options.set_extended(true);
 | |
|   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
 | |
|   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
 | |
|   TestOneOption("EXTENDED (class)",
 | |
|                     "^ HE L{2} O "
 | |
|                     "\\s+        "
 | |
|                     "\\w+ $      ",
 | |
|                     str,
 | |
|                     options,
 | |
|                     false);
 | |
| 
 | |
|   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
 | |
|   TestOneOption("EXTENDED (function)",
 | |
|                     "^ HE L{2} O "
 | |
|                     "\\s+        "
 | |
|                     "\\w+ $      ",
 | |
|                     str,
 | |
|                     pcrecpp::EXTENDED(),
 | |
|                     false);
 | |
| 
 | |
|   options.set_extended(false);
 | |
|   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
 | |
| }
 | |
| 
 | |
| static void Test_NO_AUTO_CAPTURE() {
 | |
|   RE_Options options;
 | |
|   const char *str = "HELLO world";
 | |
|   string captured;
 | |
| 
 | |
|   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
 | |
|   if (VERBOSE_TEST)
 | |
|     printf("parentheses capture text\n");
 | |
|   RE re("(world|universe)$", options);
 | |
|   CHECK(re.Extract("\\1", str , &captured));
 | |
|   CHECK_EQ(captured, "world");
 | |
|   options.set_no_auto_capture(true);
 | |
|   printf("testing Option <NO_AUTO_CAPTURE>\n");
 | |
|   if (VERBOSE_TEST)
 | |
|     printf("parentheses do not capture text\n");
 | |
|   re.Extract("\\1",str, &captured );
 | |
|   CHECK_EQ(captured, "world");
 | |
| }
 | |
| 
 | |
| static void Test_UNGREEDY() {
 | |
|   RE_Options options;
 | |
|   const char *str = "HELLO, 'this' is the 'world'";
 | |
| 
 | |
|   options.set_ungreedy(true);
 | |
|   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
 | |
|   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
 | |
|   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
 | |
| 
 | |
|   options.set_ungreedy(false);
 | |
|   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
 | |
|   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
 | |
| }
 | |
| 
 | |
| static void Test_all_options() {
 | |
|   const char *str = "HELLO\n" "cruel\n" "world";
 | |
|   RE_Options options;
 | |
|   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
 | |
| 
 | |
|   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
 | |
|   options.set_all_options(0);
 | |
|   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
 | |
|   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
 | |
| 
 | |
|   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
 | |
|   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
 | |
|                   " ^ c r u e l $ ",
 | |
|                   str,
 | |
|                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
 | |
|                   false);
 | |
| 
 | |
|   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
 | |
|                   " ^ c r u e l $ ",
 | |
|                   str,
 | |
|                   RE_Options()
 | |
|                        .set_multiline(true)
 | |
|                        .set_extended(true),
 | |
|                   false);
 | |
| 
 | |
|   options.set_all_options(0);
 | |
|   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
 | |
| 
 | |
| }
 | |
| 
 | |
| static void TestOptions() {
 | |
|   printf("Testing Options\n");
 | |
|   Test_CASELESS();
 | |
|   Test_MULTILINE();
 | |
|   Test_DOTALL();
 | |
|   Test_DOLLAR_ENDONLY();
 | |
|   Test_EXTENDED();
 | |
|   Test_NO_AUTO_CAPTURE();
 | |
|   Test_UNGREEDY();
 | |
|   Test_EXTRA();
 | |
|   Test_all_options();
 | |
| }
 | |
| 
 | |
| static void TestConstructors() {
 | |
|   printf("Testing constructors\n");
 | |
| 
 | |
|   RE_Options options;
 | |
|   options.set_dotall(true);
 | |
|   const char *str = "HELLO\n" "cruel\n" "world";
 | |
| 
 | |
|   RE orig("HELLO.*world", options);
 | |
|   CHECK(orig.FullMatch(str));
 | |
| 
 | |
|   RE copy1(orig);
 | |
|   CHECK(copy1.FullMatch(str));
 | |
| 
 | |
|   RE copy2("not a match");
 | |
|   CHECK(!copy2.FullMatch(str));
 | |
|   copy2 = copy1;
 | |
|   CHECK(copy2.FullMatch(str));
 | |
|   copy2 = orig;
 | |
|   CHECK(copy2.FullMatch(str));
 | |
| 
 | |
|   // Make sure when we assign to ourselves, nothing bad happens
 | |
|   orig = orig;
 | |
|   copy1 = copy1;
 | |
|   copy2 = copy2;
 | |
|   CHECK(orig.FullMatch(str));
 | |
|   CHECK(copy1.FullMatch(str));
 | |
|   CHECK(copy2.FullMatch(str));
 | |
| }
 | |
| 
 | |
| int main(int argc, char** argv) {
 | |
|   // Treat any flag as --help
 | |
|   if (argc > 1 && argv[1][0] == '-') {
 | |
|     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
 | |
|            "       If 'timingX ###' is specified, run the given timing test\n"
 | |
|            "       with the given number of iterations, rather than running\n"
 | |
|            "       the default corectness test.\n", argv[0]);
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   if (argc > 1) {
 | |
|     if ( argc == 2 || atoi(argv[2]) == 0) {
 | |
|       printf("timing mode needs a num-iters argument\n");
 | |
|       return 1;
 | |
|     }
 | |
|     if (!strcmp(argv[1], "timing1"))
 | |
|       Timing1(atoi(argv[2]));
 | |
|     else if (!strcmp(argv[1], "timing2"))
 | |
|       Timing2(atoi(argv[2]));
 | |
|     else if (!strcmp(argv[1], "timing3"))
 | |
|       Timing3(atoi(argv[2]));
 | |
|     else
 | |
|       printf("Unknown argument '%s'\n", argv[1]);
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   printf("Testing FullMatch\n");
 | |
| 
 | |
|   int i;
 | |
|   string s;
 | |
| 
 | |
|   /***** FullMatch with no args *****/
 | |
| 
 | |
|   CHECK(RE("h.*o").FullMatch("hello"));
 | |
|   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
 | |
|   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
 | |
|   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
 | |
|   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
 | |
|   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
 | |
| 
 | |
|   /***** FullMatch with args *****/
 | |
| 
 | |
|   // Zero-arg
 | |
|   CHECK(RE("\\d+").FullMatch("1001"));
 | |
| 
 | |
|   // Single-arg
 | |
|   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
 | |
|   CHECK_EQ(i, 1001);
 | |
|   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
 | |
|   CHECK_EQ(i, -123);
 | |
|   CHECK(!RE("()\\d+").FullMatch("10", &i));
 | |
|   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
 | |
|                                 &i));
 | |
| 
 | |
|   // Digits surrounding integer-arg
 | |
|   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
 | |
|   CHECK_EQ(i, 23);
 | |
|   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
 | |
|   CHECK_EQ(i, 1);
 | |
|   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
 | |
|   CHECK_EQ(i, -1);
 | |
|   CHECK(RE("(\\d)").PartialMatch("1234", &i));
 | |
|   CHECK_EQ(i, 1);
 | |
|   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
 | |
|   CHECK_EQ(i, -1);
 | |
| 
 | |
|   // String-arg
 | |
|   CHECK(RE("h(.*)o").FullMatch("hello", &s));
 | |
|   CHECK_EQ(s, string("ell"));
 | |
| 
 | |
|   // StringPiece-arg
 | |
|   StringPiece sp;
 | |
|   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
 | |
|   CHECK_EQ(sp.size(), 4);
 | |
|   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
 | |
|   CHECK_EQ(i, 1234);
 | |
| 
 | |
|   // Multi-arg
 | |
|   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
 | |
|   CHECK_EQ(s, string("ruby"));
 | |
|   CHECK_EQ(i, 1234);
 | |
| 
 | |
|   // Ignore non-void* NULL arg
 | |
|   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
 | |
|   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
 | |
|   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
 | |
|   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
 | |
| #ifdef HAVE_LONG_LONG
 | |
|   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
 | |
| #endif
 | |
|   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
 | |
|   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
 | |
| 
 | |
|   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
 | |
|   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
 | |
|   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
 | |
|   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
 | |
|   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
 | |
|   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
 | |
| 
 | |
|   // Ignored arg
 | |
|   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
 | |
|   CHECK_EQ(s, string("ruby"));
 | |
|   CHECK_EQ(i, 1234);
 | |
| 
 | |
|   // Type tests
 | |
|   {
 | |
|     char c;
 | |
|     CHECK(RE("(H)ello").FullMatch("Hello", &c));
 | |
|     CHECK_EQ(c, 'H');
 | |
|   }
 | |
|   {
 | |
|     unsigned char c;
 | |
|     CHECK(RE("(H)ello").FullMatch("Hello", &c));
 | |
|     CHECK_EQ(c, static_cast<unsigned char>('H'));
 | |
|   }
 | |
|   {
 | |
|     short v;
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
 | |
|     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
 | |
|     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
 | |
|   }
 | |
|   {
 | |
|     unsigned short v;
 | |
|     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
 | |
|     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
 | |
|     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
 | |
|     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
 | |
|   }
 | |
|   {
 | |
|     int v;
 | |
|     static const int max_value = 0x7fffffff;
 | |
|     static const int min_value = -max_value - 1;
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
 | |
|     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
 | |
|     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
 | |
|   }
 | |
|   {
 | |
|     unsigned int v;
 | |
|     static const unsigned int max_value = 0xfffffffful;
 | |
|     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
 | |
|     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
 | |
|     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
 | |
|   }
 | |
| #ifdef HAVE_LONG_LONG
 | |
| # if defined(__MINGW__) || defined(__MINGW32__)
 | |
| #   define LLD "%I64d"
 | |
| #   define LLU "%I64u"
 | |
| # else
 | |
| #   define LLD "%lld"
 | |
| #   define LLU "%llu"
 | |
| # endif
 | |
|   {
 | |
|     long long v;
 | |
|     static const long long max_value = 0x7fffffffffffffffLL;
 | |
|     static const long long min_value = -max_value - 1;
 | |
|     char buf[32];  // definitely big enough for a long long
 | |
| 
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
 | |
| 
 | |
|     sprintf(buf, LLD, max_value);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
 | |
| 
 | |
|     sprintf(buf, LLD, min_value);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
 | |
| 
 | |
|     sprintf(buf, LLD, max_value);
 | |
|     assert(buf[strlen(buf)-1] != '9');
 | |
|     buf[strlen(buf)-1]++;
 | |
|     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
 | |
| 
 | |
|     sprintf(buf, LLD, min_value);
 | |
|     assert(buf[strlen(buf)-1] != '9');
 | |
|     buf[strlen(buf)-1]++;
 | |
|     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
 | |
|   }
 | |
| #endif
 | |
| #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
 | |
|   {
 | |
|     unsigned long long v;
 | |
|     long long v2;
 | |
|     static const unsigned long long max_value = 0xffffffffffffffffULL;
 | |
|     char buf[32];  // definitely big enough for a unsigned long long
 | |
| 
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
 | |
| 
 | |
|     sprintf(buf, LLU, max_value);
 | |
|     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
 | |
| 
 | |
|     assert(buf[strlen(buf)-1] != '9');
 | |
|     buf[strlen(buf)-1]++;
 | |
|     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
 | |
|   }
 | |
| #endif
 | |
|   {
 | |
|     float v;
 | |
|     CHECK(RE("(.*)").FullMatch("100", &v));
 | |
|     CHECK(RE("(.*)").FullMatch("-100.", &v));
 | |
|     CHECK(RE("(.*)").FullMatch("1e23", &v));
 | |
|   }
 | |
|   {
 | |
|     double v;
 | |
|     CHECK(RE("(.*)").FullMatch("100", &v));
 | |
|     CHECK(RE("(.*)").FullMatch("-100.", &v));
 | |
|     CHECK(RE("(.*)").FullMatch("1e23", &v));
 | |
|   }
 | |
| 
 | |
|   // Check that matching is fully anchored
 | |
|   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
 | |
|   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
 | |
|   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
 | |
|   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
 | |
| 
 | |
|   // Braces
 | |
|   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
 | |
|   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
 | |
|   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
 | |
| 
 | |
|   // Complicated RE
 | |
|   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
 | |
|   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
 | |
|   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
 | |
|   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
 | |
| 
 | |
|   // Check full-match handling (needs '$' tacked on internally)
 | |
|   CHECK(RE("fo|foo").FullMatch("fo"));
 | |
|   CHECK(RE("fo|foo").FullMatch("foo"));
 | |
|   CHECK(RE("fo|foo$").FullMatch("fo"));
 | |
|   CHECK(RE("fo|foo$").FullMatch("foo"));
 | |
|   CHECK(RE("foo$").FullMatch("foo"));
 | |
|   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
 | |
|   CHECK(!RE("fo|bar").FullMatch("fox"));
 | |
| 
 | |
|   // Uncomment the following if we change the handling of '$' to
 | |
|   // prevent it from matching a trailing newline
 | |
|   if (false) {
 | |
|     // Check that we don't get bitten by pcre's special handling of a
 | |
|     // '\n' at the end of the string matching '$'
 | |
|     CHECK(!RE("foo$").PartialMatch("foo\n"));
 | |
|   }
 | |
| 
 | |
|   // Number of args
 | |
|   int a[16];
 | |
|   CHECK(RE("").FullMatch(""));
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d){1}").FullMatch("1",
 | |
|                                  &a[0]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d)(\\d)").FullMatch("12",
 | |
|                                    &a[0],  &a[1]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
|   CHECK_EQ(a[1], 2);
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
 | |
|                                         &a[0],  &a[1],  &a[2]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
|   CHECK_EQ(a[1], 2);
 | |
|   CHECK_EQ(a[2], 3);
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
 | |
|                                              &a[0],  &a[1],  &a[2],  &a[3]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
|   CHECK_EQ(a[1], 2);
 | |
|   CHECK_EQ(a[2], 3);
 | |
|   CHECK_EQ(a[3], 4);
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
 | |
|                                                   &a[0],  &a[1],  &a[2],
 | |
|                                                   &a[3],  &a[4]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
|   CHECK_EQ(a[1], 2);
 | |
|   CHECK_EQ(a[2], 3);
 | |
|   CHECK_EQ(a[3], 4);
 | |
|   CHECK_EQ(a[4], 5);
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
 | |
|                                                        &a[0],  &a[1],  &a[2],
 | |
|                                                        &a[3],  &a[4],  &a[5]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
|   CHECK_EQ(a[1], 2);
 | |
|   CHECK_EQ(a[2], 3);
 | |
|   CHECK_EQ(a[3], 4);
 | |
|   CHECK_EQ(a[4], 5);
 | |
|   CHECK_EQ(a[5], 6);
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
 | |
|                                                             &a[0],  &a[1],  &a[2],  &a[3],
 | |
|                                                             &a[4],  &a[5],  &a[6]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
|   CHECK_EQ(a[1], 2);
 | |
|   CHECK_EQ(a[2], 3);
 | |
|   CHECK_EQ(a[3], 4);
 | |
|   CHECK_EQ(a[4], 5);
 | |
|   CHECK_EQ(a[5], 6);
 | |
|   CHECK_EQ(a[6], 7);
 | |
| 
 | |
|   memset(a, 0, sizeof(0));
 | |
|   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
 | |
|            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
 | |
|                "1234567890123456",
 | |
|                &a[0],  &a[1],  &a[2],  &a[3],
 | |
|                &a[4],  &a[5],  &a[6],  &a[7],
 | |
|                &a[8],  &a[9],  &a[10], &a[11],
 | |
|                &a[12], &a[13], &a[14], &a[15]));
 | |
|   CHECK_EQ(a[0], 1);
 | |
|   CHECK_EQ(a[1], 2);
 | |
|   CHECK_EQ(a[2], 3);
 | |
|   CHECK_EQ(a[3], 4);
 | |
|   CHECK_EQ(a[4], 5);
 | |
|   CHECK_EQ(a[5], 6);
 | |
|   CHECK_EQ(a[6], 7);
 | |
|   CHECK_EQ(a[7], 8);
 | |
|   CHECK_EQ(a[8], 9);
 | |
|   CHECK_EQ(a[9], 0);
 | |
|   CHECK_EQ(a[10], 1);
 | |
|   CHECK_EQ(a[11], 2);
 | |
|   CHECK_EQ(a[12], 3);
 | |
|   CHECK_EQ(a[13], 4);
 | |
|   CHECK_EQ(a[14], 5);
 | |
|   CHECK_EQ(a[15], 6);
 | |
| 
 | |
|   /***** PartialMatch *****/
 | |
| 
 | |
|   printf("Testing PartialMatch\n");
 | |
| 
 | |
|   CHECK(RE("h.*o").PartialMatch("hello"));
 | |
|   CHECK(RE("h.*o").PartialMatch("othello"));
 | |
|   CHECK(RE("h.*o").PartialMatch("hello!"));
 | |
|   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
 | |
| 
 | |
|   /***** other tests *****/
 | |
| 
 | |
|   RadixTests();
 | |
|   TestReplace();
 | |
|   TestExtract();
 | |
|   TestConsume();
 | |
|   TestFindAndConsume();
 | |
|   TestQuoteMetaAll();
 | |
|   TestMatchNumberPeculiarity();
 | |
| 
 | |
|   // Check the pattern() accessor
 | |
|   {
 | |
|     const string kPattern = "http://([^/]+)/.*";
 | |
|     const RE re(kPattern);
 | |
|     CHECK_EQ(kPattern, re.pattern());
 | |
|   }
 | |
| 
 | |
|   // Check RE error field.
 | |
|   {
 | |
|     RE re("foo");
 | |
|     CHECK(re.error().empty());  // Must have no error
 | |
|   }
 | |
| 
 | |
| #ifdef SUPPORT_UTF8
 | |
|   // Check UTF-8 handling
 | |
|   {
 | |
|     printf("Testing UTF-8 handling\n");
 | |
| 
 | |
|     // Three Japanese characters (nihongo)
 | |
|     const unsigned char utf8_string[] = {
 | |
|          0xe6, 0x97, 0xa5, // 65e5
 | |
|          0xe6, 0x9c, 0xac, // 627c
 | |
|          0xe8, 0xaa, 0x9e, // 8a9e
 | |
|          0
 | |
|     };
 | |
|     const unsigned char utf8_pattern[] = {
 | |
|          '.',
 | |
|          0xe6, 0x9c, 0xac, // 627c
 | |
|          '.',
 | |
|          0
 | |
|     };
 | |
| 
 | |
|     // Both should match in either mode, bytes or UTF-8
 | |
|     RE re_test1(".........");
 | |
|     CHECK(re_test1.FullMatch(utf8_string));
 | |
|     RE re_test2("...", pcrecpp::UTF8());
 | |
|     CHECK(re_test2.FullMatch(utf8_string));
 | |
| 
 | |
|     // Check that '.' matches one byte or UTF-8 character
 | |
|     // according to the mode.
 | |
|     string ss;
 | |
|     RE re_test3("(.)");
 | |
|     CHECK(re_test3.PartialMatch(utf8_string, &ss));
 | |
|     CHECK_EQ(ss, string("\xe6"));
 | |
|     RE re_test4("(.)", pcrecpp::UTF8());
 | |
|     CHECK(re_test4.PartialMatch(utf8_string, &ss));
 | |
|     CHECK_EQ(ss, string("\xe6\x97\xa5"));
 | |
| 
 | |
|     // Check that string matches itself in either mode
 | |
|     RE re_test5(utf8_string);
 | |
|     CHECK(re_test5.FullMatch(utf8_string));
 | |
|     RE re_test6(utf8_string, pcrecpp::UTF8());
 | |
|     CHECK(re_test6.FullMatch(utf8_string));
 | |
| 
 | |
|     // Check that pattern matches string only in UTF8 mode
 | |
|     RE re_test7(utf8_pattern);
 | |
|     CHECK(!re_test7.FullMatch(utf8_string));
 | |
|     RE re_test8(utf8_pattern, pcrecpp::UTF8());
 | |
|     CHECK(re_test8.FullMatch(utf8_string));
 | |
|   }
 | |
| 
 | |
|   // Check that ungreedy, UTF8 regular expressions don't match when they
 | |
|   // oughtn't -- see bug 82246.
 | |
|   {
 | |
|     // This code always worked.
 | |
|     const char* pattern = "\\w+X";
 | |
|     const string target = "a aX";
 | |
|     RE match_sentence(pattern);
 | |
|     RE match_sentence_re(pattern, pcrecpp::UTF8());
 | |
| 
 | |
|     CHECK(!match_sentence.FullMatch(target));
 | |
|     CHECK(!match_sentence_re.FullMatch(target));
 | |
|   }
 | |
| 
 | |
|   {
 | |
|     const char* pattern = "(?U)\\w+X";
 | |
|     const string target = "a aX";
 | |
|     RE match_sentence(pattern);
 | |
|     RE match_sentence_re(pattern, pcrecpp::UTF8());
 | |
| 
 | |
|     CHECK(!match_sentence.FullMatch(target));
 | |
|     CHECK(!match_sentence_re.FullMatch(target));
 | |
|   }
 | |
| #endif  /* def SUPPORT_UTF8 */
 | |
| 
 | |
|   printf("Testing error reporting\n");
 | |
| 
 | |
|   { RE re("a\\1"); CHECK(!re.error().empty()); }
 | |
|   {
 | |
|     RE re("a[x");
 | |
|     CHECK(!re.error().empty());
 | |
|   }
 | |
|   {
 | |
|     RE re("a[z-a]");
 | |
|     CHECK(!re.error().empty());
 | |
|   }
 | |
|   {
 | |
|     RE re("a[[:foobar:]]");
 | |
|     CHECK(!re.error().empty());
 | |
|   }
 | |
|   {
 | |
|     RE re("a(b");
 | |
|     CHECK(!re.error().empty());
 | |
|   }
 | |
|   {
 | |
|     RE re("a\\");
 | |
|     CHECK(!re.error().empty());
 | |
|   }
 | |
| 
 | |
|   // Test that recursion is stopped
 | |
|   TestRecursion();
 | |
| 
 | |
|   // Test Options
 | |
|   if (getenv("VERBOSE_TEST") != NULL)
 | |
|     VERBOSE_TEST  = true;
 | |
|   TestOptions();
 | |
| 
 | |
|   // Test the constructors
 | |
|   TestConstructors();
 | |
| 
 | |
|   // Done
 | |
|   printf("OK\n");
 | |
| 
 | |
|   return 0;
 | |
| }
 |