200 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			200 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// Copyright (c) 2005, Google Inc.
 | 
						|
// All rights reserved.
 | 
						|
//
 | 
						|
// Redistribution and use in source and binary forms, with or without
 | 
						|
// modification, are permitted provided that the following conditions are
 | 
						|
// met:
 | 
						|
//
 | 
						|
//     * Redistributions of source code must retain the above copyright
 | 
						|
// notice, this list of conditions and the following disclaimer.
 | 
						|
//     * Redistributions in binary form must reproduce the above
 | 
						|
// copyright notice, this list of conditions and the following disclaimer
 | 
						|
// in the documentation and/or other materials provided with the
 | 
						|
// distribution.
 | 
						|
//     * Neither the name of Google Inc. nor the names of its
 | 
						|
// contributors may be used to endorse or promote products derived from
 | 
						|
// this software without specific prior written permission.
 | 
						|
//
 | 
						|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | 
						|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | 
						|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | 
						|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | 
						|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | 
						|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | 
						|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
						|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
						|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
						|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
						|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
						|
//
 | 
						|
// Author: Sanjay Ghemawat
 | 
						|
 | 
						|
#ifdef HAVE_CONFIG_H
 | 
						|
#include "config.h"
 | 
						|
#endif
 | 
						|
 | 
						|
#include <vector>
 | 
						|
#include <assert.h>
 | 
						|
 | 
						|
#include "pcrecpp_internal.h"
 | 
						|
#include "pcre_scanner.h"
 | 
						|
 | 
						|
using std::vector;
 | 
						|
 | 
						|
namespace pcrecpp {
 | 
						|
 | 
						|
Scanner::Scanner()
 | 
						|
  : data_(),
 | 
						|
    input_(data_),
 | 
						|
    skip_(NULL),
 | 
						|
    should_skip_(false),
 | 
						|
    skip_repeat_(false),
 | 
						|
    save_comments_(false),
 | 
						|
    comments_(NULL),
 | 
						|
    comments_offset_(0) {
 | 
						|
}
 | 
						|
 | 
						|
Scanner::Scanner(const string& in)
 | 
						|
  : data_(in),
 | 
						|
    input_(data_),
 | 
						|
    skip_(NULL),
 | 
						|
    should_skip_(false),
 | 
						|
    skip_repeat_(false),
 | 
						|
    save_comments_(false),
 | 
						|
    comments_(NULL),
 | 
						|
    comments_offset_(0) {
 | 
						|
}
 | 
						|
 | 
						|
Scanner::~Scanner() {
 | 
						|
  delete skip_;
 | 
						|
  delete comments_;
 | 
						|
}
 | 
						|
 | 
						|
void Scanner::SetSkipExpression(const char* re) {
 | 
						|
  delete skip_;
 | 
						|
  if (re != NULL) {
 | 
						|
    skip_ = new RE(re);
 | 
						|
    should_skip_ = true;
 | 
						|
    skip_repeat_ = true;
 | 
						|
    ConsumeSkip();
 | 
						|
  } else {
 | 
						|
    skip_ = NULL;
 | 
						|
    should_skip_ = false;
 | 
						|
    skip_repeat_ = false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void Scanner::Skip(const char* re) {
 | 
						|
  delete skip_;
 | 
						|
  if (re != NULL) {
 | 
						|
    skip_ = new RE(re);
 | 
						|
    should_skip_ = true;
 | 
						|
    skip_repeat_ = false;
 | 
						|
    ConsumeSkip();
 | 
						|
  } else {
 | 
						|
    skip_ = NULL;
 | 
						|
    should_skip_ = false;
 | 
						|
    skip_repeat_ = false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void Scanner::DisableSkip() {
 | 
						|
  assert(skip_ != NULL);
 | 
						|
  should_skip_ = false;
 | 
						|
}
 | 
						|
 | 
						|
void Scanner::EnableSkip() {
 | 
						|
  assert(skip_ != NULL);
 | 
						|
  should_skip_ = true;
 | 
						|
  ConsumeSkip();
 | 
						|
}
 | 
						|
 | 
						|
int Scanner::LineNumber() const {
 | 
						|
  // TODO: Make it more efficient by keeping track of the last point
 | 
						|
  // where we computed line numbers and counting newlines since then.
 | 
						|
  // We could use std:count, but not all systems have it. :-(
 | 
						|
  int count = 1;
 | 
						|
  for (const char* p = data_.data(); p < input_.data(); ++p)
 | 
						|
    if (*p == '\n')
 | 
						|
      ++count;
 | 
						|
  return count;
 | 
						|
}
 | 
						|
 | 
						|
int Scanner::Offset() const {
 | 
						|
  return input_.data() - data_.c_str();
 | 
						|
}
 | 
						|
 | 
						|
bool Scanner::LookingAt(const RE& re) const {
 | 
						|
  int consumed;
 | 
						|
  return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
bool Scanner::Consume(const RE& re,
 | 
						|
                      const Arg& arg0,
 | 
						|
                      const Arg& arg1,
 | 
						|
                      const Arg& arg2) {
 | 
						|
  const bool result = re.Consume(&input_, arg0, arg1, arg2);
 | 
						|
  if (result && should_skip_) ConsumeSkip();
 | 
						|
  return result;
 | 
						|
}
 | 
						|
 | 
						|
// helper function to consume *skip_ and honour save_comments_
 | 
						|
void Scanner::ConsumeSkip() {
 | 
						|
  const char* start_data = input_.data();
 | 
						|
  while (skip_->Consume(&input_)) {
 | 
						|
    if (!skip_repeat_) {
 | 
						|
      // Only one skip allowed.
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  if (save_comments_) {
 | 
						|
    if (comments_ == NULL) {
 | 
						|
      comments_ = new vector<StringPiece>;
 | 
						|
    }
 | 
						|
    // already pointing one past end, so no need to +1
 | 
						|
    int length = input_.data() - start_data;
 | 
						|
    if (length > 0) {
 | 
						|
      comments_->push_back(StringPiece(start_data, length));
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
 | 
						|
  // short circuit out if we've not yet initialized comments_
 | 
						|
  // (e.g., when save_comments is false)
 | 
						|
  if (!comments_) {
 | 
						|
    return;
 | 
						|
  }
 | 
						|
  // TODO: if we guarantee that comments_ will contain StringPieces
 | 
						|
  // that are ordered by their start, then we can do a binary search
 | 
						|
  // for the first StringPiece at or past start and then scan for the
 | 
						|
  // ones contained in the range, quit early (use equal_range or
 | 
						|
  // lower_bound)
 | 
						|
  for (vector<StringPiece>::const_iterator it = comments_->begin();
 | 
						|
       it != comments_->end(); ++it) {
 | 
						|
    if ((it->data() >= data_.c_str() + start &&
 | 
						|
         it->data() + it->size() <= data_.c_str() + end)) {
 | 
						|
      ranges->push_back(*it);
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
 | 
						|
  // short circuit out if we've not yet initialized comments_
 | 
						|
  // (e.g., when save_comments is false)
 | 
						|
  if (!comments_) {
 | 
						|
    return;
 | 
						|
  }
 | 
						|
  for (vector<StringPiece>::const_iterator it =
 | 
						|
         comments_->begin() + comments_offset_;
 | 
						|
       it != comments_->end(); ++it) {
 | 
						|
    ranges->push_back(*it);
 | 
						|
    ++comments_offset_;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
}   // namespace pcrecpp
 |