Updated PCRE source to version 8.32 (bug 5593).
This commit is contained in:
parent
ed6d8521e6
commit
6ce00034a2
@ -232,6 +232,23 @@ while (<STDIN>)
|
|||||||
redo; # Process the joined lines
|
redo; # Process the joined lines
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# .EX/.EE are used in the pcredemo page to bracket the entire program,
|
||||||
|
# which is unmodified except for turning backslash into "\e".
|
||||||
|
|
||||||
|
elsif (/^\.EX\s*$/)
|
||||||
|
{
|
||||||
|
print TEMP "<PRE>\n";
|
||||||
|
while (<STDIN>)
|
||||||
|
{
|
||||||
|
last if /^\.EE\s*$/;
|
||||||
|
s/\\e/\\/g;
|
||||||
|
s/&/&/g;
|
||||||
|
s/</</g;
|
||||||
|
s/>/>/g;
|
||||||
|
print TEMP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# Ignore anything not recognized
|
# Ignore anything not recognized
|
||||||
|
|
||||||
next;
|
next;
|
||||||
|
@ -8,16 +8,38 @@ Email domain: cam.ac.uk
|
|||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2009 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
All rights reserved
|
All rights reserved
|
||||||
|
|
||||||
|
|
||||||
|
PCRE JUST-IN-TIME COMPILATION SUPPORT
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
Written by: Zoltan Herczeg
|
||||||
|
Email local part: hzmester
|
||||||
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
|
Copyright(c) 2010-2012 Zoltan Herczeg
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
STACK-LESS JUST-IN-TIME COMPILER
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
Written by: Zoltan Herczeg
|
||||||
|
Email local part: hzmester
|
||||||
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
|
Copyright(c) 2009-2012 Zoltan Herczeg
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
THE C++ WRAPPER LIBRARY
|
THE C++ WRAPPER LIBRARY
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
Written by: Google Inc.
|
Written by: Google Inc.
|
||||||
|
|
||||||
Copyright (c) 2007-2008 Google Inc
|
Copyright (c) 2007-2012 Google Inc
|
||||||
All rights reserved
|
All rights reserved
|
||||||
|
|
||||||
####
|
####
|
||||||
|
@ -35,10 +35,37 @@
|
|||||||
# to disable the final configuration report.
|
# to disable the final configuration report.
|
||||||
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
||||||
# are set by specifying a release type.
|
# are set by specifying a release type.
|
||||||
|
# 2010-01-02 PH added test for stdint.h
|
||||||
|
# 2010-03-02 PH added test for inttypes.h
|
||||||
|
# 2011-08-01 PH added PCREGREP_BUFSIZE
|
||||||
|
# 2011-08-22 PH added PCRE_SUPPORT_JIT
|
||||||
|
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
|
||||||
|
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
|
||||||
|
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
|
||||||
|
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
|
||||||
|
# the source dir by the user prior to building
|
||||||
|
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
|
||||||
|
# of DEBUG location only (likely only matters in MSVC)
|
||||||
|
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
|
||||||
|
# RunGrepTest (used for UNIX and Msys)
|
||||||
|
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
|
||||||
|
# RunTest.bat in Win32 (for effortless testing with "make test")
|
||||||
|
# 2011-10-04 Sheri Increased minimum required cmake version
|
||||||
|
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
|
||||||
|
# 2012-01-10 Zoltan Herczeg added libpcre16 support
|
||||||
|
# 2012-01-13 Stephen Kelly added out of source build support
|
||||||
|
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
|
||||||
|
# of the configure.ac file
|
||||||
|
# 2012-02-26 PH added support for libedit
|
||||||
|
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||||
|
# 2012-09-08 ChPe added PCRE32 support
|
||||||
|
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||||
|
|
||||||
PROJECT(PCRE C CXX)
|
PROJECT(PCRE C CXX)
|
||||||
|
|
||||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.4.6)
|
# Increased minimum to 2.8.0 to support newer add_test features
|
||||||
|
|
||||||
|
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
||||||
|
|
||||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||||
|
|
||||||
@ -46,6 +73,7 @@ SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
|||||||
FIND_PACKAGE( BZip2 )
|
FIND_PACKAGE( BZip2 )
|
||||||
FIND_PACKAGE( ZLIB )
|
FIND_PACKAGE( ZLIB )
|
||||||
FIND_PACKAGE( Readline )
|
FIND_PACKAGE( Readline )
|
||||||
|
FIND_PACKAGE( Editline )
|
||||||
|
|
||||||
# Configuration checks
|
# Configuration checks
|
||||||
|
|
||||||
@ -55,6 +83,8 @@ INCLUDE(CheckFunctionExists)
|
|||||||
INCLUDE(CheckTypeSize)
|
INCLUDE(CheckTypeSize)
|
||||||
|
|
||||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||||
|
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
|
||||||
|
CHECK_INCLUDE_FILE(inttypes.h HAVE_INTTYPES_H)
|
||||||
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
|
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
|
||||||
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
|
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
|
||||||
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
|
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
|
||||||
@ -81,10 +111,19 @@ CHECK_TYPE_SIZE("unsigned long long" UNSIGNED_LONG_LONG)
|
|||||||
SET(BUILD_SHARED_LIBS OFF CACHE BOOL
|
SET(BUILD_SHARED_LIBS OFF CACHE BOOL
|
||||||
"Build shared libraries instead of static ones.")
|
"Build shared libraries instead of static ones.")
|
||||||
|
|
||||||
|
OPTION(PCRE_BUILD_PCRE8 "Build 8 bit PCRE library" ON)
|
||||||
|
|
||||||
|
OPTION(PCRE_BUILD_PCRE16 "Build 16 bit PCRE library" OFF)
|
||||||
|
|
||||||
|
OPTION(PCRE_BUILD_PCRE32 "Build 32 bit PCRE library" OFF)
|
||||||
|
|
||||||
OPTION(PCRE_BUILD_PCRECPP "Build the PCRE C++ library (pcrecpp)." ON)
|
OPTION(PCRE_BUILD_PCRECPP "Build the PCRE C++ library (pcrecpp)." ON)
|
||||||
|
|
||||||
SET(PCRE_EBCDIC OFF CACHE BOOL
|
SET(PCRE_EBCDIC OFF CACHE BOOL
|
||||||
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems)")
|
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
|
||||||
|
|
||||||
|
SET(PCRE_EBCDIC_NL25 OFF CACHE BOOL
|
||||||
|
"Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
|
||||||
|
|
||||||
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
||||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||||
@ -95,6 +134,9 @@ SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
|||||||
SET(PCRE_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
|
SET(PCRE_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
|
||||||
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
|
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
|
||||||
|
|
||||||
|
SET(PCREGREP_BUFSIZE "20480" CACHE STRING
|
||||||
|
"Buffer size parameter for pcregrep. See PCREGREP_BUFSIZE in config.h.in for details.")
|
||||||
|
|
||||||
SET(PCRE_NEWLINE "LF" CACHE STRING
|
SET(PCRE_NEWLINE "LF" CACHE STRING
|
||||||
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
||||||
|
|
||||||
@ -104,26 +146,31 @@ SET(PCRE_NO_RECURSE OFF CACHE BOOL
|
|||||||
SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING
|
SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING
|
||||||
"Threshold for malloc() usage. See POSIX_MALLOC_THRESHOLD in config.h.in for details.")
|
"Threshold for malloc() usage. See POSIX_MALLOC_THRESHOLD in config.h.in for details.")
|
||||||
|
|
||||||
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
|
SET(PCRE_SUPPORT_JIT OFF CACHE BOOL
|
||||||
"Enable support for Unicode properties. (If set, UTF-8 support will be enabled as well)")
|
"Enable support for Just-in-time compiling.")
|
||||||
|
|
||||||
SET(PCRE_SUPPORT_UTF8 OFF CACHE BOOL
|
SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL
|
||||||
"Enable support for the Unicode UTF-8 encoding.")
|
"Enable use of Just-in-time compiling in pcregrep.")
|
||||||
|
|
||||||
|
SET(PCRE_SUPPORT_UTF OFF CACHE BOOL
|
||||||
|
"Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.")
|
||||||
|
|
||||||
|
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
|
||||||
|
"Enable support for Unicode properties (if set, UTF support will be enabled as well).")
|
||||||
|
|
||||||
SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||||
|
|
||||||
|
SET(PCRE_SUPPORT_VALGRIND OFF CACHE BOOL
|
||||||
|
"Enable Valgrind support.")
|
||||||
|
|
||||||
|
SET(PCRE_SUPPORT_COVERAGE OFF CACHE BOOL
|
||||||
|
"Enable code coverage support using gcov.")
|
||||||
|
|
||||||
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
|
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
|
||||||
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
|
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
|
||||||
OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
|
OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
|
||||||
|
|
||||||
IF (PCRE_BUILD_TESTS)
|
|
||||||
IF (NOT PCRE_BUILD_PCREGREP)
|
|
||||||
MESSAGE(STATUS "** Building tests requires pcregrep: PCRE_BUILD_PCREGREP forced ON")
|
|
||||||
SET(PCRE_BUILD_PCREGREP ON)
|
|
||||||
ENDIF(NOT PCRE_BUILD_PCREGREP)
|
|
||||||
ENDIF(PCRE_BUILD_TESTS)
|
|
||||||
|
|
||||||
IF (MINGW)
|
IF (MINGW)
|
||||||
OPTION(NON_STANDARD_LIB_PREFIX
|
OPTION(NON_STANDARD_LIB_PREFIX
|
||||||
"ON=Shared libraries built in mingw will be named pcre.dll, etc., instead of libpcre.dll, etc."
|
"ON=Shared libraries built in mingw will be named pcre.dll, etc., instead of libpcre.dll, etc."
|
||||||
@ -150,6 +197,14 @@ IF(PCRE_SUPPORT_LIBZ)
|
|||||||
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
|
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
|
||||||
ENDIF(PCRE_SUPPORT_LIBZ)
|
ENDIF(PCRE_SUPPORT_LIBZ)
|
||||||
|
|
||||||
|
# editline lib
|
||||||
|
IF(EDITLINE_FOUND)
|
||||||
|
OPTION (PCRE_SUPPORT_LIBEDIT "Enable support for linking pcretest with libedit." OFF)
|
||||||
|
ENDIF(EDITLINE_FOUND)
|
||||||
|
IF(PCRE_SUPPORT_LIBEDIT)
|
||||||
|
INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
|
||||||
|
ENDIF(PCRE_SUPPORT_LIBEDIT)
|
||||||
|
|
||||||
# readline lib
|
# readline lib
|
||||||
IF(READLINE_FOUND)
|
IF(READLINE_FOUND)
|
||||||
OPTION (PCRE_SUPPORT_LIBREADLINE "Enable support for linking pcretest with libreadline." ON)
|
OPTION (PCRE_SUPPORT_LIBREADLINE "Enable support for linking pcretest with libreadline." ON)
|
||||||
@ -186,18 +241,68 @@ IF(NOT BUILD_SHARED_LIBS)
|
|||||||
SET(PCRE_STATIC 1)
|
SET(PCRE_STATIC 1)
|
||||||
ENDIF(NOT BUILD_SHARED_LIBS)
|
ENDIF(NOT BUILD_SHARED_LIBS)
|
||||||
|
|
||||||
|
IF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
|
||||||
|
MESSAGE(FATAL_ERROR "At least one of PCRE_BUILD_PCRE8, PCRE_BUILD_PCRE16 or PCRE_BUILD_PCRE32 must be enabled")
|
||||||
|
ENDIF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE8)
|
||||||
|
SET(SUPPORT_PCRE8 1)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE8)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE16)
|
||||||
|
SET(SUPPORT_PCRE16 1)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE16)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE32)
|
||||||
|
SET(SUPPORT_PCRE32 1)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE32)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRECPP AND NOT PCRE_BUILD_PCRE8)
|
||||||
|
MESSAGE(STATUS "** PCRE_BUILD_PCRE8 must be enabled for the C++ library support")
|
||||||
|
SET(PCRE_BUILD_PCRECPP OFF)
|
||||||
|
ENDIF(PCRE_BUILD_PCRECPP AND NOT PCRE_BUILD_PCRE8)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCREGREP AND NOT PCRE_BUILD_PCRE8)
|
||||||
|
MESSAGE(STATUS "** PCRE_BUILD_PCRE8 must be enabled for the pcregrep program")
|
||||||
|
SET(PCRE_BUILD_PCREGREP OFF)
|
||||||
|
ENDIF(PCRE_BUILD_PCREGREP AND NOT PCRE_BUILD_PCRE8)
|
||||||
|
|
||||||
|
IF(PCRE_SUPPORT_LIBREADLINE AND PCRE_SUPPORT_LIBEDIT)
|
||||||
|
MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
|
||||||
|
ENDIF(PCRE_SUPPORT_LIBREADLINE AND PCRE_SUPPORT_LIBEDIT)
|
||||||
|
|
||||||
IF(PCRE_SUPPORT_BSR_ANYCRLF)
|
IF(PCRE_SUPPORT_BSR_ANYCRLF)
|
||||||
SET(BSR_ANYCRLF 1)
|
SET(BSR_ANYCRLF 1)
|
||||||
ENDIF(PCRE_SUPPORT_BSR_ANYCRLF)
|
ENDIF(PCRE_SUPPORT_BSR_ANYCRLF)
|
||||||
|
|
||||||
IF(PCRE_SUPPORT_UTF8 OR PCRE_SUPPORT_UNICODE_PROPERTIES)
|
IF(PCRE_SUPPORT_UTF OR PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||||
SET(SUPPORT_UTF8 1)
|
SET(SUPPORT_UTF 1)
|
||||||
ENDIF(PCRE_SUPPORT_UTF8 OR PCRE_SUPPORT_UNICODE_PROPERTIES)
|
SET(PCRE_SUPPORT_UTF ON)
|
||||||
|
ENDIF(PCRE_SUPPORT_UTF OR PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||||
|
|
||||||
IF(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
IF(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||||
SET(SUPPORT_UCP 1)
|
SET(SUPPORT_UCP 1)
|
||||||
ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||||
|
|
||||||
|
IF(PCRE_SUPPORT_JIT)
|
||||||
|
SET(SUPPORT_JIT 1)
|
||||||
|
ENDIF(PCRE_SUPPORT_JIT)
|
||||||
|
|
||||||
|
IF(PCRE_SUPPORT_PCREGREP_JIT)
|
||||||
|
SET(SUPPORT_PCREGREP_JIT 1)
|
||||||
|
ENDIF(PCRE_SUPPORT_PCREGREP_JIT)
|
||||||
|
|
||||||
|
IF(PCRE_SUPPORT_VALGRIND)
|
||||||
|
SET(SUPPORT_VALGRIND 1)
|
||||||
|
ENDIF(PCRE_SUPPORT_VALGRIND)
|
||||||
|
|
||||||
|
IF(PCRE_SUPPORT_COVERAGE)
|
||||||
|
SET(SUPPORT_GCOV 1)
|
||||||
|
IF(NOT CMAKE_COMPILER_IS_GNUCC)
|
||||||
|
MESSAGE(FATAL_ERROR "Code coverage reports can only be generated when using GCC")
|
||||||
|
ENDIF(NOT CMAKE_COMPILER_IS_GNUCC)
|
||||||
|
ENDIF(PCRE_SUPPORT_COVERAGE)
|
||||||
|
|
||||||
# This next one used to contain
|
# This next one used to contain
|
||||||
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
|
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
|
||||||
# but I was advised to add the NCURSES test as well, along with
|
# but I was advised to add the NCURSES test as well, along with
|
||||||
@ -209,6 +314,13 @@ IF(PCRE_SUPPORT_LIBREADLINE)
|
|||||||
SET(PCRETEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
SET(PCRETEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||||
ENDIF(PCRE_SUPPORT_LIBREADLINE)
|
ENDIF(PCRE_SUPPORT_LIBREADLINE)
|
||||||
|
|
||||||
|
# libedit is a plug-compatible alternative to libreadline
|
||||||
|
|
||||||
|
IF(PCRE_SUPPORT_LIBEDIT)
|
||||||
|
SET(SUPPORT_LIBEDIT 1)
|
||||||
|
SET(PCRETEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||||
|
ENDIF(PCRE_SUPPORT_LIBEDIT)
|
||||||
|
|
||||||
IF(PCRE_SUPPORT_LIBZ)
|
IF(PCRE_SUPPORT_LIBZ)
|
||||||
SET(SUPPORT_LIBZ 1)
|
SET(SUPPORT_LIBZ 1)
|
||||||
SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${ZLIB_LIBRARIES})
|
SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${ZLIB_LIBRARIES})
|
||||||
@ -243,8 +355,25 @@ ENDIF(NEWLINE STREQUAL "")
|
|||||||
|
|
||||||
IF(PCRE_EBCDIC)
|
IF(PCRE_EBCDIC)
|
||||||
SET(EBCDIC 1)
|
SET(EBCDIC 1)
|
||||||
|
IF(PCRE_NEWLINE STREQUAL "LF")
|
||||||
|
SET(NEWLINE "21")
|
||||||
|
ENDIF(PCRE_NEWLINE STREQUAL "LF")
|
||||||
|
IF(PCRE_NEWLINE STREQUAL "CRLF")
|
||||||
|
SET(NEWLINE "3349")
|
||||||
|
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
|
||||||
ENDIF(PCRE_EBCDIC)
|
ENDIF(PCRE_EBCDIC)
|
||||||
|
|
||||||
|
IF(PCRE_EBCDIC_NL25)
|
||||||
|
SET(EBCDIC 1)
|
||||||
|
SET(EBCDIC_NL25 1)
|
||||||
|
IF(PCRE_NEWLINE STREQUAL "LF")
|
||||||
|
SET(NEWLINE "37")
|
||||||
|
ENDIF(PCRE_NEWLINE STREQUAL "LF")
|
||||||
|
IF(PCRE_NEWLINE STREQUAL "CRLF")
|
||||||
|
SET(NEWLINE "3365")
|
||||||
|
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
|
||||||
|
ENDIF(PCRE_EBCDIC_NL25)
|
||||||
|
|
||||||
IF(PCRE_NO_RECURSE)
|
IF(PCRE_NO_RECURSE)
|
||||||
SET(NO_RECURSE 1)
|
SET(NO_RECURSE 1)
|
||||||
ENDIF(PCRE_NO_RECURSE)
|
ENDIF(PCRE_NO_RECURSE)
|
||||||
@ -254,9 +383,29 @@ CONFIGURE_FILE(config-cmake.h.in
|
|||||||
${PROJECT_BINARY_DIR}/config.h
|
${PROJECT_BINARY_DIR}/config.h
|
||||||
@ONLY)
|
@ONLY)
|
||||||
|
|
||||||
CONFIGURE_FILE(pcre.h.generic
|
# Parse version numbers and date out of configure.ac
|
||||||
|
|
||||||
|
file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
|
||||||
|
configure_lines
|
||||||
|
LIMIT_COUNT 50 # Read only the first 50 lines of the file
|
||||||
|
)
|
||||||
|
|
||||||
|
set(SEARCHED_VARIABLES "pcre_major" "pcre_minor" "pcre_prerelease" "pcre_date")
|
||||||
|
foreach(configure_line ${configure_lines})
|
||||||
|
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
||||||
|
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
||||||
|
if (NOT ${_substitution_variable_upper})
|
||||||
|
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
|
||||||
|
if (CMAKE_MATCH_1)
|
||||||
|
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
CONFIGURE_FILE(pcre.h.in
|
||||||
${PROJECT_BINARY_DIR}/pcre.h
|
${PROJECT_BINARY_DIR}/pcre.h
|
||||||
COPYONLY)
|
@ONLY)
|
||||||
|
|
||||||
# What about pcre-config and libpcre.pc?
|
# What about pcre-config and libpcre.pc?
|
||||||
|
|
||||||
@ -295,8 +444,10 @@ ENDIF(PCRE_REBUILD_CHARTABLES)
|
|||||||
|
|
||||||
SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/pcre.h)
|
SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/pcre.h)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE8)
|
||||||
SET(PCRE_SOURCES
|
SET(PCRE_SOURCES
|
||||||
${PROJECT_BINARY_DIR}/pcre_chartables.c
|
pcre_byte_order.c
|
||||||
|
pcre_chartables.c
|
||||||
pcre_compile.c
|
pcre_compile.c
|
||||||
pcre_config.c
|
pcre_config.c
|
||||||
pcre_dfa_exec.c
|
pcre_dfa_exec.c
|
||||||
@ -304,14 +455,14 @@ SET(PCRE_SOURCES
|
|||||||
pcre_fullinfo.c
|
pcre_fullinfo.c
|
||||||
pcre_get.c
|
pcre_get.c
|
||||||
pcre_globals.c
|
pcre_globals.c
|
||||||
pcre_info.c
|
pcre_jit_compile.c
|
||||||
pcre_newline.c
|
|
||||||
pcre_maketables.c
|
pcre_maketables.c
|
||||||
|
pcre_newline.c
|
||||||
pcre_ord2utf8.c
|
pcre_ord2utf8.c
|
||||||
pcre_refcount.c
|
pcre_refcount.c
|
||||||
|
pcre_string_utils.c
|
||||||
pcre_study.c
|
pcre_study.c
|
||||||
pcre_tables.c
|
pcre_tables.c
|
||||||
pcre_try_flipped.c
|
|
||||||
pcre_ucd.c
|
pcre_ucd.c
|
||||||
pcre_valid_utf8.c
|
pcre_valid_utf8.c
|
||||||
pcre_version.c
|
pcre_version.c
|
||||||
@ -322,6 +473,85 @@ SET(PCREPOSIX_HEADERS pcreposix.h)
|
|||||||
|
|
||||||
SET(PCREPOSIX_SOURCES pcreposix.c)
|
SET(PCREPOSIX_SOURCES pcreposix.c)
|
||||||
|
|
||||||
|
ENDIF(PCRE_BUILD_PCRE8)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE16)
|
||||||
|
SET(PCRE16_SOURCES
|
||||||
|
pcre16_byte_order.c
|
||||||
|
pcre16_chartables.c
|
||||||
|
pcre16_compile.c
|
||||||
|
pcre16_config.c
|
||||||
|
pcre16_dfa_exec.c
|
||||||
|
pcre16_exec.c
|
||||||
|
pcre16_fullinfo.c
|
||||||
|
pcre16_get.c
|
||||||
|
pcre16_globals.c
|
||||||
|
pcre16_jit_compile.c
|
||||||
|
pcre16_maketables.c
|
||||||
|
pcre16_newline.c
|
||||||
|
pcre16_ord2utf16.c
|
||||||
|
pcre16_refcount.c
|
||||||
|
pcre16_string_utils.c
|
||||||
|
pcre16_study.c
|
||||||
|
pcre16_tables.c
|
||||||
|
pcre16_ucd.c
|
||||||
|
pcre16_utf16_utils.c
|
||||||
|
pcre16_valid_utf16.c
|
||||||
|
pcre16_version.c
|
||||||
|
pcre16_xclass.c
|
||||||
|
)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE16)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE32)
|
||||||
|
SET(PCRE32_SOURCES
|
||||||
|
pcre32_byte_order.c
|
||||||
|
pcre32_chartables.c
|
||||||
|
pcre32_compile.c
|
||||||
|
pcre32_config.c
|
||||||
|
pcre32_dfa_exec.c
|
||||||
|
pcre32_exec.c
|
||||||
|
pcre32_fullinfo.c
|
||||||
|
pcre32_get.c
|
||||||
|
pcre32_globals.c
|
||||||
|
pcre32_jit_compile.c
|
||||||
|
pcre32_maketables.c
|
||||||
|
pcre32_newline.c
|
||||||
|
pcre32_ord2utf32.c
|
||||||
|
pcre32_refcount.c
|
||||||
|
pcre32_string_utils.c
|
||||||
|
pcre32_study.c
|
||||||
|
pcre32_tables.c
|
||||||
|
pcre32_ucd.c
|
||||||
|
pcre32_utf32_utils.c
|
||||||
|
pcre32_valid_utf32.c
|
||||||
|
pcre32_version.c
|
||||||
|
pcre32_xclass.c
|
||||||
|
)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE32)
|
||||||
|
|
||||||
|
IF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
|
||||||
|
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre.o
|
||||||
|
PRE-LINK
|
||||||
|
COMMAND windres ARGS pcre.rc pcre.o
|
||||||
|
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||||
|
COMMENT Using pcre coff info in mingw build)
|
||||||
|
SET(PCRE_SOURCES
|
||||||
|
${PCRE_SOURCES} ${PROJECT_SOURCE_DIR}/pcre.o
|
||||||
|
)
|
||||||
|
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
|
||||||
|
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||||
|
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcreposix.o
|
||||||
|
PRE-LINK
|
||||||
|
COMMAND windres ARGS pcreposix.rc pcreposix.o
|
||||||
|
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||||
|
COMMENT Using pcreposix coff info in mingw build)
|
||||||
|
SET(PCREPOSIX_SOURCES
|
||||||
|
${PCREPOSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcreposix.o
|
||||||
|
)
|
||||||
|
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||||
|
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
SET(PCRECPP_HEADERS
|
SET(PCRECPP_HEADERS
|
||||||
pcrecpp.h
|
pcrecpp.h
|
||||||
pcre_scanner.h
|
pcre_scanner.h
|
||||||
@ -354,11 +584,13 @@ SET(targets)
|
|||||||
|
|
||||||
# Libraries
|
# Libraries
|
||||||
# pcre
|
# pcre
|
||||||
|
IF(PCRE_BUILD_PCRE8)
|
||||||
ADD_LIBRARY(pcre ${PCRE_HEADERS} ${PCRE_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
ADD_LIBRARY(pcre ${PCRE_HEADERS} ${PCRE_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
SET(targets ${targets} pcre)
|
SET(targets ${targets} pcre)
|
||||||
ADD_LIBRARY(pcreposix ${PCREPOSIX_HEADERS} ${PCREPOSIX_SOURCES})
|
ADD_LIBRARY(pcreposix ${PCREPOSIX_HEADERS} ${PCREPOSIX_SOURCES})
|
||||||
SET(targets ${targets} pcreposix)
|
SET(targets ${targets} pcreposix)
|
||||||
TARGET_LINK_LIBRARIES(pcreposix pcre)
|
TARGET_LINK_LIBRARIES(pcreposix pcre)
|
||||||
|
|
||||||
IF(MINGW AND NOT PCRE_STATIC)
|
IF(MINGW AND NOT PCRE_STATIC)
|
||||||
IF(NON_STANDARD_LIB_PREFIX)
|
IF(NON_STANDARD_LIB_PREFIX)
|
||||||
SET_TARGET_PROPERTIES(pcre pcreposix
|
SET_TARGET_PROPERTIES(pcre pcreposix
|
||||||
@ -373,12 +605,53 @@ IF(MINGW AND NOT PCRE_STATIC)
|
|||||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
|
ENDIF(PCRE_BUILD_PCRE8)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE16)
|
||||||
|
ADD_LIBRARY(pcre16 ${PCRE_HEADERS} ${PCRE16_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
SET(targets ${targets} pcre16)
|
||||||
|
|
||||||
|
IF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
IF(NON_STANDARD_LIB_PREFIX)
|
||||||
|
SET_TARGET_PROPERTIES(pcre16
|
||||||
|
PROPERTIES PREFIX ""
|
||||||
|
)
|
||||||
|
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||||
|
|
||||||
|
IF(NON_STANDARD_LIB_SUFFIX)
|
||||||
|
SET_TARGET_PROPERTIES(pcre16
|
||||||
|
PROPERTIES SUFFIX "-0.dll"
|
||||||
|
)
|
||||||
|
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||||
|
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
|
ENDIF(PCRE_BUILD_PCRE16)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCRE32)
|
||||||
|
ADD_LIBRARY(pcre32 ${PCRE_HEADERS} ${PCRE32_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
SET(targets ${targets} pcre32)
|
||||||
|
|
||||||
|
IF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
IF(NON_STANDARD_LIB_PREFIX)
|
||||||
|
SET_TARGET_PROPERTIES(pcre32
|
||||||
|
PROPERTIES PREFIX ""
|
||||||
|
)
|
||||||
|
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||||
|
|
||||||
|
IF(NON_STANDARD_LIB_SUFFIX)
|
||||||
|
SET_TARGET_PROPERTIES(pcre32
|
||||||
|
PROPERTIES SUFFIX "-0.dll"
|
||||||
|
)
|
||||||
|
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||||
|
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
|
ENDIF(PCRE_BUILD_PCRE32)
|
||||||
|
|
||||||
# pcrecpp
|
# pcrecpp
|
||||||
IF(PCRE_BUILD_PCRECPP)
|
IF(PCRE_BUILD_PCRECPP)
|
||||||
ADD_LIBRARY(pcrecpp ${PCRECPP_HEADERS} ${PCRECPP_SOURCES})
|
ADD_LIBRARY(pcrecpp ${PCRECPP_HEADERS} ${PCRECPP_SOURCES})
|
||||||
SET(targets ${targets} pcrecpp)
|
SET(targets ${targets} pcrecpp)
|
||||||
TARGET_LINK_LIBRARIES(pcrecpp pcre)
|
TARGET_LINK_LIBRARIES(pcrecpp pcre)
|
||||||
|
|
||||||
IF(MINGW AND NOT PCRE_STATIC)
|
IF(MINGW AND NOT PCRE_STATIC)
|
||||||
IF(NON_STANDARD_LIB_PREFIX)
|
IF(NON_STANDARD_LIB_PREFIX)
|
||||||
@ -413,14 +686,49 @@ IF(PCRE_BUILD_PCREGREP)
|
|||||||
TARGET_LINK_LIBRARIES(pcregrep pcreposix ${PCREGREP_LIBS})
|
TARGET_LINK_LIBRARIES(pcregrep pcreposix ${PCREGREP_LIBS})
|
||||||
ENDIF(PCRE_BUILD_PCREGREP)
|
ENDIF(PCRE_BUILD_PCREGREP)
|
||||||
|
|
||||||
|
|
||||||
# Testing
|
# Testing
|
||||||
IF(PCRE_BUILD_TESTS)
|
IF(PCRE_BUILD_TESTS)
|
||||||
ENABLE_TESTING()
|
ENABLE_TESTING()
|
||||||
|
|
||||||
ADD_EXECUTABLE(pcretest pcretest.c)
|
SET(PCRETEST_SOURCES pcretest.c)
|
||||||
|
IF(PCRE_BUILD_PCRE8)
|
||||||
|
LIST(APPEND PCRETEST_SOURCES pcre_printint.c)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE8)
|
||||||
|
IF(PCRE_BUILD_PCRE16)
|
||||||
|
LIST(APPEND PCRETEST_SOURCES pcre16_printint.c)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE16)
|
||||||
|
IF(PCRE_BUILD_PCRE32)
|
||||||
|
LIST(APPEND PCRETEST_SOURCES pcre32_printint.c)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE32)
|
||||||
|
|
||||||
|
ADD_EXECUTABLE(pcretest ${PCRETEST_SOURCES})
|
||||||
SET(targets ${targets} pcretest)
|
SET(targets ${targets} pcretest)
|
||||||
TARGET_LINK_LIBRARIES(pcretest pcreposix ${PCRETEST_LIBS})
|
IF(PCRE_BUILD_PCRE8)
|
||||||
|
LIST(APPEND PCRETEST_LIBS pcreposix pcre)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE8)
|
||||||
|
IF(PCRE_BUILD_PCRE16)
|
||||||
|
LIST(APPEND PCRETEST_LIBS pcre16)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE16)
|
||||||
|
IF(PCRE_BUILD_PCRE32)
|
||||||
|
LIST(APPEND PCRETEST_LIBS pcre32)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE32)
|
||||||
|
TARGET_LINK_LIBRARIES(pcretest ${PCRETEST_LIBS})
|
||||||
|
|
||||||
|
IF(PCRE_SUPPORT_JIT)
|
||||||
|
ADD_EXECUTABLE(pcre_jit_test pcre_jit_test.c)
|
||||||
|
SET(targets ${targets} pcre_jit_test)
|
||||||
|
SET(PCRE_JIT_TEST_LIBS )
|
||||||
|
IF(PCRE_BUILD_PCRE8)
|
||||||
|
LIST(APPEND PCRE_JIT_TEST_LIBS pcre)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE8)
|
||||||
|
IF(PCRE_BUILD_PCRE16)
|
||||||
|
LIST(APPEND PCRE_JIT_TEST_LIBS pcre16)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE16)
|
||||||
|
IF(PCRE_BUILD_PCRE32)
|
||||||
|
LIST(APPEND PCRE_JIT_TEST_LIBS pcre32)
|
||||||
|
ENDIF(PCRE_BUILD_PCRE32)
|
||||||
|
TARGET_LINK_LIBRARIES(pcre_jit_test ${PCRE_JIT_TEST_LIBS})
|
||||||
|
ENDIF(PCRE_SUPPORT_JIT)
|
||||||
|
|
||||||
IF(PCRE_BUILD_PCRECPP)
|
IF(PCRE_BUILD_PCRECPP)
|
||||||
ADD_EXECUTABLE(pcrecpp_unittest pcrecpp_unittest.cc)
|
ADD_EXECUTABLE(pcrecpp_unittest pcrecpp_unittest.cc)
|
||||||
@ -432,7 +740,6 @@ IF(PCRE_BUILD_TESTS)
|
|||||||
)
|
)
|
||||||
ENDIF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC)
|
ENDIF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
|
|
||||||
ADD_EXECUTABLE(pcre_scanner_unittest pcre_scanner_unittest.cc)
|
ADD_EXECUTABLE(pcre_scanner_unittest pcre_scanner_unittest.cc)
|
||||||
SET(targets ${targets} pcre_scanner_unittest)
|
SET(targets ${targets} pcre_scanner_unittest)
|
||||||
TARGET_LINK_LIBRARIES(pcre_scanner_unittest pcrecpp)
|
TARGET_LINK_LIBRARIES(pcre_scanner_unittest pcrecpp)
|
||||||
@ -442,42 +749,101 @@ IF(PCRE_BUILD_TESTS)
|
|||||||
TARGET_LINK_LIBRARIES(pcre_stringpiece_unittest pcrecpp)
|
TARGET_LINK_LIBRARIES(pcre_stringpiece_unittest pcrecpp)
|
||||||
ENDIF(PCRE_BUILD_PCRECPP)
|
ENDIF(PCRE_BUILD_PCRECPP)
|
||||||
|
|
||||||
GET_TARGET_PROPERTY(PCREGREP_EXE pcregrep DEBUG_LOCATION)
|
# exes in Debug location tested by the RunTest shell script
|
||||||
|
# via "make test"
|
||||||
|
IF(PCRE_BUILD_PCREGREP)
|
||||||
|
GET_TARGET_PROPERTY(PCREGREP_EXE pcregrep DEBUG_LOCATION)
|
||||||
|
ENDIF(PCRE_BUILD_PCREGREP)
|
||||||
|
|
||||||
GET_TARGET_PROPERTY(PCRETEST_EXE pcretest DEBUG_LOCATION)
|
GET_TARGET_PROPERTY(PCRETEST_EXE pcretest DEBUG_LOCATION)
|
||||||
|
|
||||||
# Write out a CTest configuration file that sets some needed environment
|
# =================================================
|
||||||
# variables for the test scripts.
|
# Write out a CTest configuration file
|
||||||
#
|
#
|
||||||
FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
||||||
"# This is a generated file.
|
"# This is a generated file.
|
||||||
SET(ENV{srcdir} ${PROJECT_SOURCE_DIR})
|
MESSAGE(\"When testing is complete, review test output in the
|
||||||
SET(ENV{pcregrep} ${PCREGREP_EXE})
|
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
|
||||||
SET(ENV{pcretest} ${PCRETEST_EXE})
|
MESSAGE(\" \")
|
||||||
")
|
")
|
||||||
|
|
||||||
|
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_test.sh
|
||||||
|
"#! /bin/sh
|
||||||
|
# This is a generated file.
|
||||||
|
srcdir=${PROJECT_SOURCE_DIR}
|
||||||
|
pcretest=${PCRETEST_EXE}
|
||||||
|
source ${PROJECT_SOURCE_DIR}/RunTest
|
||||||
|
if test \"$?\" != \"0\"; then exit 1; fi
|
||||||
|
# End
|
||||||
|
")
|
||||||
|
|
||||||
IF(UNIX)
|
IF(UNIX)
|
||||||
ADD_TEST(pcre_test ${PROJECT_SOURCE_DIR}/RunTest)
|
ADD_TEST(pcre_test sh ${PROJECT_BINARY_DIR}/pcre_test.sh)
|
||||||
ADD_TEST(pcre_grep_test ${PROJECT_SOURCE_DIR}/RunGrepTest)
|
|
||||||
ENDIF(UNIX)
|
ENDIF(UNIX)
|
||||||
|
|
||||||
|
IF(PCRE_BUILD_PCREGREP)
|
||||||
|
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_grep_test.sh
|
||||||
|
"#! /bin/sh
|
||||||
|
# This is a generated file.
|
||||||
|
srcdir=${PROJECT_SOURCE_DIR}
|
||||||
|
pcregrep=${PCREGREP_EXE}
|
||||||
|
pcretest=${PCRETEST_EXE}
|
||||||
|
source ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||||
|
if test \"$?\" != \"0\"; then exit 1; fi
|
||||||
|
# End
|
||||||
|
")
|
||||||
|
|
||||||
|
IF(UNIX)
|
||||||
|
ADD_TEST(pcre_grep_test sh ${PROJECT_BINARY_DIR}/pcre_grep_test.sh)
|
||||||
|
ENDIF(UNIX)
|
||||||
|
ENDIF(PCRE_BUILD_PCREGREP)
|
||||||
|
|
||||||
IF(WIN32)
|
IF(WIN32)
|
||||||
ADD_TEST(pcre_test cmd /C ${PROJECT_SOURCE_DIR}/RunTest.bat)
|
# Provide environment for executing the bat file version of RunTest
|
||||||
|
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
|
||||||
|
FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
|
||||||
|
FILE(TO_NATIVE_PATH ${PCRETEST_EXE} winexe)
|
||||||
|
|
||||||
|
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_test.bat
|
||||||
|
"\@REM This is a generated file.
|
||||||
|
\@echo off
|
||||||
|
setlocal
|
||||||
|
SET srcdir=\"${winsrc}\"
|
||||||
|
SET pcretest=\"${winexe}\"
|
||||||
|
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcretest=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcretest.exe\"
|
||||||
|
call %srcdir%\\RunTest.Bat
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo RunTest.bat tests successfully completed
|
||||||
|
")
|
||||||
|
|
||||||
|
ADD_TEST(NAME pcre_test_bat
|
||||||
|
COMMAND pcre_test.bat)
|
||||||
|
SET_TESTS_PROPERTIES(pcre_test_bat PROPERTIES
|
||||||
|
PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
|
||||||
|
|
||||||
|
IF("$ENV{OSTYPE}" STREQUAL "msys")
|
||||||
|
# Both the sh and bat file versions of RunTest are run if make test is used
|
||||||
|
# in msys
|
||||||
|
ADD_TEST(pcre_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre_test.sh)
|
||||||
|
IF(PCRE_BUILD_PCREGREP)
|
||||||
|
ADD_TEST(pcre_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre_grep_test.sh)
|
||||||
|
ENDIF(PCRE_BUILD_PCREGREP)
|
||||||
|
ENDIF("$ENV{OSTYPE}" STREQUAL "msys")
|
||||||
|
|
||||||
ENDIF(WIN32)
|
ENDIF(WIN32)
|
||||||
|
|
||||||
GET_TARGET_PROPERTY(PCRECPP_UNITTEST_EXE
|
# Changed to accommodate testing whichever location was just built
|
||||||
pcrecpp_unittest
|
|
||||||
DEBUG_LOCATION)
|
|
||||||
|
|
||||||
GET_TARGET_PROPERTY(PCRE_SCANNER_UNITTEST_EXE
|
IF(PCRE_SUPPORT_JIT)
|
||||||
pcre_scanner_unittest
|
ADD_TEST(pcre_jit_test pcre_jit_test)
|
||||||
DEBUG_LOCATION)
|
ENDIF(PCRE_SUPPORT_JIT)
|
||||||
|
|
||||||
GET_TARGET_PROPERTY(PCRE_STRINGPIECE_UNITTEST_EXE
|
IF(PCRE_BUILD_PCRECPP)
|
||||||
pcre_stringpiece_unittest
|
ADD_TEST(pcrecpp_test pcrecpp_unittest)
|
||||||
DEBUG_LOCATION)
|
ADD_TEST(pcre_scanner_test pcre_scanner_unittest)
|
||||||
|
ADD_TEST(pcre_stringpiece_test pcre_stringpiece_unittest)
|
||||||
|
ENDIF(PCRE_BUILD_PCRECPP)
|
||||||
|
|
||||||
ADD_TEST(pcrecpp_test ${PCRECPP_UNITTEST_EXE})
|
|
||||||
ADD_TEST(pcre_scanner_test ${PCRE_SCANNER_UNITTEST_EXE})
|
|
||||||
ADD_TEST(pcre_stringpiece_test ${PCRE_STRINGPIECE_UNITTEST_EXE})
|
|
||||||
ENDIF(PCRE_BUILD_TESTS)
|
ENDIF(PCRE_BUILD_TESTS)
|
||||||
|
|
||||||
# Installation
|
# Installation
|
||||||
@ -507,7 +873,6 @@ ELSE(PCRE_BUILD_PCRECPP)
|
|||||||
SET(man3 ${man3_new})
|
SET(man3 ${man3_new})
|
||||||
ENDIF(PCRE_BUILD_PCRECPP)
|
ENDIF(PCRE_BUILD_PCRECPP)
|
||||||
|
|
||||||
|
|
||||||
INSTALL(FILES ${man1} DESTINATION man/man1)
|
INSTALL(FILES ${man1} DESTINATION man/man1)
|
||||||
INSTALL(FILES ${man3} DESTINATION man/man3)
|
INSTALL(FILES ${man3} DESTINATION man/man3)
|
||||||
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
|
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
|
||||||
@ -537,12 +902,17 @@ IF(PCRE_SHOW_REPORT)
|
|||||||
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
||||||
MESSAGE(STATUS " C++ compiler flags .............. : ${CMAKE_CXX_FLAGS}${cxxfsp}${CMAKE_CXX_FLAGS_${buildtype}}")
|
MESSAGE(STATUS " C++ compiler flags .............. : ${CMAKE_CXX_FLAGS}${cxxfsp}${CMAKE_CXX_FLAGS_${buildtype}}")
|
||||||
MESSAGE(STATUS "")
|
MESSAGE(STATUS "")
|
||||||
|
MESSAGE(STATUS " Build 8 bit PCRE library ........ : ${PCRE_BUILD_PCRE8}")
|
||||||
|
MESSAGE(STATUS " Build 16 bit PCRE library ....... : ${PCRE_BUILD_PCRE16}")
|
||||||
|
MESSAGE(STATUS " Build 32 bit PCRE library ....... : ${PCRE_BUILD_PCRE32}")
|
||||||
MESSAGE(STATUS " Build C++ library ............... : ${PCRE_BUILD_PCRECPP}")
|
MESSAGE(STATUS " Build C++ library ............... : ${PCRE_BUILD_PCRECPP}")
|
||||||
MESSAGE(STATUS " Enable UTF-8 support ............ : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE_SUPPORT_JIT}")
|
||||||
|
MESSAGE(STATUS " Enable UTF support .............. : ${PCRE_SUPPORT_UTF}")
|
||||||
MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
||||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}")
|
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}")
|
||||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
|
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
|
||||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE_EBCDIC}")
|
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE_EBCDIC}")
|
||||||
|
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE_EBCDIC_NL25}")
|
||||||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}")
|
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}")
|
||||||
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
||||||
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
||||||
@ -552,22 +922,33 @@ IF(PCRE_SHOW_REPORT)
|
|||||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||||
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
||||||
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
|
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
|
||||||
MESSAGE(STATUS " Build tests (implies pcretest) .. : ${PCRE_BUILD_TESTS}")
|
MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
|
||||||
|
MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
|
||||||
|
MESSAGE(STATUS " Build tests (implies pcretest .. : ${PCRE_BUILD_TESTS}")
|
||||||
|
MESSAGE(STATUS " and pcregrep)")
|
||||||
IF(ZLIB_FOUND)
|
IF(ZLIB_FOUND)
|
||||||
MESSAGE(STATUS " Link pcregrep with libz ......... : ${PCRE_SUPPORT_LIBZ}")
|
MESSAGE(STATUS " Link pcregrep with libz ......... : ${PCRE_SUPPORT_LIBZ}")
|
||||||
ELSE(ZLIB_FOUND)
|
ELSE(ZLIB_FOUND)
|
||||||
MESSAGE(STATUS " Link pcregrep with libz ......... : None" )
|
MESSAGE(STATUS " Link pcregrep with libz ......... : Library not found" )
|
||||||
ENDIF(ZLIB_FOUND)
|
ENDIF(ZLIB_FOUND)
|
||||||
IF(BZIP2_FOUND)
|
IF(BZIP2_FOUND)
|
||||||
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : ${PCRE_SUPPORT_LIBBZ2}")
|
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : ${PCRE_SUPPORT_LIBBZ2}")
|
||||||
ELSE(BZIP2_FOUND)
|
ELSE(BZIP2_FOUND)
|
||||||
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : None" )
|
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : Library not found" )
|
||||||
ENDIF(BZIP2_FOUND)
|
ENDIF(BZIP2_FOUND)
|
||||||
IF(NOT PCRE_SUPPORT_LIBREADLINE)
|
IF(EDITLINE_FOUND)
|
||||||
MESSAGE(STATUS " Link pcretest with libreadline .. : None" )
|
MESSAGE(STATUS " Link pcretest with libeditline .. : ${PCRE_SUPPORT_LIBEDIT}")
|
||||||
ELSE(NOT PCRE_SUPPORT_LIBREADLINE)
|
ELSE(EDITLINE_FOUND)
|
||||||
|
MESSAGE(STATUS " Link pcretest with libeditline .. : Library not found" )
|
||||||
|
ENDIF(EDITLINE_FOUND)
|
||||||
|
IF(READLINE_FOUND)
|
||||||
MESSAGE(STATUS " Link pcretest with libreadline .. : ${PCRE_SUPPORT_LIBREADLINE}")
|
MESSAGE(STATUS " Link pcretest with libreadline .. : ${PCRE_SUPPORT_LIBREADLINE}")
|
||||||
ENDIF(NOT PCRE_SUPPORT_LIBREADLINE)
|
ELSE(READLINE_FOUND)
|
||||||
|
MESSAGE(STATUS " Link pcretest with libreadline .. : Library not found" )
|
||||||
|
ENDIF(READLINE_FOUND)
|
||||||
|
MESSAGE(STATUS " Support Valgrind .................: ${PCRE_SUPPORT_VALGRIND}")
|
||||||
|
MESSAGE(STATUS " Support coverage .................: ${PCRE_SUPPORT_COVERAGE}")
|
||||||
|
|
||||||
IF(MINGW AND NOT PCRE_STATIC)
|
IF(MINGW AND NOT PCRE_STATIC)
|
||||||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||||
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
||||||
|
1473
tools/pcre/ChangeLog
1473
tools/pcre/ChangeLog
File diff suppressed because it is too large
Load Diff
67
tools/pcre/CheckMan
Normal file
67
tools/pcre/CheckMan
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#! /usr/bin/perl
|
||||||
|
|
||||||
|
# A script to scan PCRE's man pages to check for typos in the control
|
||||||
|
# sequences. I use only a small set of the available repertoire, so it is
|
||||||
|
# straightforward to check that nothing else has slipped in by mistake. This
|
||||||
|
# script should be called in the doc directory.
|
||||||
|
|
||||||
|
$yield = 0;
|
||||||
|
|
||||||
|
while (scalar(@ARGV) > 0)
|
||||||
|
{
|
||||||
|
$line = 0;
|
||||||
|
$file = shift @ARGV;
|
||||||
|
|
||||||
|
open (IN, $file) || die "Failed to open $file\n";
|
||||||
|
|
||||||
|
while (<IN>)
|
||||||
|
{
|
||||||
|
$line++;
|
||||||
|
if (/^\s*$/)
|
||||||
|
{
|
||||||
|
printf "Empty line $line of $file\n";
|
||||||
|
$yield = 1;
|
||||||
|
}
|
||||||
|
elsif (/^\./)
|
||||||
|
{
|
||||||
|
if (!/^\.\s*$|
|
||||||
|
^\.B\s+\S|
|
||||||
|
^\.TH\s\S|
|
||||||
|
^\.SH\s\S|
|
||||||
|
^\.SS\s\S|
|
||||||
|
^\.TP(?:\s\d+)?\s*$|
|
||||||
|
^\.ti\s\S|
|
||||||
|
^\.SM\s*$|
|
||||||
|
^\.rs\s*$|
|
||||||
|
^\.sp\s*$|
|
||||||
|
^\.nf\s*$|
|
||||||
|
^\.fi\s*$|
|
||||||
|
^\.P\s*$|
|
||||||
|
^\.PP\s*$|
|
||||||
|
^\.\\"(?:\ HREF)?\s*$|
|
||||||
|
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
|
||||||
|
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
|
||||||
|
^\.\\"\s<\/a>\s*$|
|
||||||
|
^\.\\"\sJOINSH\s*$|
|
||||||
|
^\.\\"\sJOIN\s*$/x
|
||||||
|
)
|
||||||
|
{
|
||||||
|
printf "Bad control line $line of $file\n";
|
||||||
|
$yield = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (/\\[^ef]|\\f[^IBP]/)
|
||||||
|
{
|
||||||
|
printf "Bad backslash in line $line of $file\n";
|
||||||
|
$yield = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(IN);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit $yield;
|
||||||
|
# End
|
@ -2,7 +2,9 @@ Technical Notes about PCRE
|
|||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
These are very rough technical notes that record potentially useful information
|
These are very rough technical notes that record potentially useful information
|
||||||
about PCRE internals.
|
about PCRE internals. For information about testing PCRE, see the pcretest
|
||||||
|
documentation and the comment at the head of the RunTest file.
|
||||||
|
|
||||||
|
|
||||||
Historical note 1
|
Historical note 1
|
||||||
-----------------
|
-----------------
|
||||||
@ -22,6 +24,7 @@ the one matching the longest subset of the subject string was chosen. This did
|
|||||||
not necessarily maximize the individual wild portions of the pattern, as is
|
not necessarily maximize the individual wild portions of the pattern, as is
|
||||||
expected in Unix and Perl-style regular expressions.
|
expected in Unix and Perl-style regular expressions.
|
||||||
|
|
||||||
|
|
||||||
Historical note 2
|
Historical note 2
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
@ -34,6 +37,7 @@ maximizing (or, optionally, minimizing in Perl) the amount of the subject that
|
|||||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||||
Friedl's terminology.
|
Friedl's terminology.
|
||||||
|
|
||||||
|
|
||||||
OK, here's the real stuff
|
OK, here's the real stuff
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
@ -44,6 +48,20 @@ in the pattern, to save on compiling time. However, because of the greater
|
|||||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
||||||
first pass through the pattern is helpful for other reasons.
|
first pass through the pattern is helpful for other reasons.
|
||||||
|
|
||||||
|
|
||||||
|
Support for 16-bit and 32-bit data strings
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
||||||
|
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
||||||
|
in any combination of 8-bit, 16-bit or 32-bit modes, creating different
|
||||||
|
libraries. In the description that follows, the word "short" is
|
||||||
|
used for a 16-bit data quantity, and the word "unit" is used for a quantity
|
||||||
|
that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
|
||||||
|
integer in 32-bit mode. However, so as not to over-complicate the text, the
|
||||||
|
names of PCRE functions are given in 8-bit form only.
|
||||||
|
|
||||||
|
|
||||||
Computing the memory requirement: how it was
|
Computing the memory requirement: how it was
|
||||||
--------------------------------------------
|
--------------------------------------------
|
||||||
|
|
||||||
@ -54,6 +72,7 @@ idea was that this would turn out faster than the Henry Spencer code because
|
|||||||
the first pass is degenerate and the second pass can just store stuff straight
|
the first pass is degenerate and the second pass can just store stuff straight
|
||||||
into the vector, which it knows is big enough.
|
into the vector, which it knows is big enough.
|
||||||
|
|
||||||
|
|
||||||
Computing the memory requirement: how it is
|
Computing the memory requirement: how it is
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
|
|
||||||
@ -63,26 +82,31 @@ things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
|
|||||||
I had a flash of inspiration as to how I could run the real compile function in
|
I had a flash of inspiration as to how I could run the real compile function in
|
||||||
a "fake" mode that enables it to compute how much memory it would need, while
|
a "fake" mode that enables it to compute how much memory it would need, while
|
||||||
actually only ever using a few hundred bytes of working memory, and without too
|
actually only ever using a few hundred bytes of working memory, and without too
|
||||||
many tests of the mode that might slow it down. So I re-factored the compiling
|
many tests of the mode that might slow it down. So I refactored the compiling
|
||||||
functions to work this way. This got rid of about 600 lines of source. It
|
functions to work this way. This got rid of about 600 lines of source. It
|
||||||
should make future maintenance and development easier. As this was such a major
|
should make future maintenance and development easier. As this was such a major
|
||||||
change, I never released 6.8, instead upping the number to 7.0 (other quite
|
change, I never released 6.8, instead upping the number to 7.0 (other quite
|
||||||
major changes are also present in the 7.0 release).
|
major changes were also present in the 7.0 release).
|
||||||
|
|
||||||
A side effect of this work is that the previous limit of 200 on the nesting
|
A side effect of this work was that the previous limit of 200 on the nesting
|
||||||
depth of parentheses was removed. However, there is a downside: pcre_compile()
|
depth of parentheses was removed. However, there is a downside: pcre_compile()
|
||||||
runs more slowly than before (30% or more, depending on the pattern) because it
|
runs more slowly than before (30% or more, depending on the pattern) because it
|
||||||
is doing a full analysis of the pattern. My hope is that this is not a big
|
is doing a full analysis of the pattern. My hope was that this would not be a
|
||||||
issue.
|
big issue, and in the event, nobody has commented on it.
|
||||||
|
|
||||||
|
|
||||||
Traditional matching function
|
Traditional matching function
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
The "traditional", and original, matching function is called pcre_exec(), and
|
The "traditional", and original, matching function is called pcre_exec(), and
|
||||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||||
and the way that Perl works. Not surprising, since it is intended to be as
|
and the way that Perl works. This is not surprising, since it is intended to be
|
||||||
compatible with Perl as possible. This is the function most users of PCRE will
|
as compatible with Perl as possible. This is the function most users of PCRE
|
||||||
use most of the time.
|
will use most of the time. From release 8.20, if PCRE is compiled with
|
||||||
|
just-in-time (JIT) support, and studying a compiled pattern with JIT is
|
||||||
|
successful, the JIT code is run instead of the normal pcre_exec() code, but the
|
||||||
|
result is the same.
|
||||||
|
|
||||||
|
|
||||||
Supplementary matching function
|
Supplementary matching function
|
||||||
-------------------------------
|
-------------------------------
|
||||||
@ -101,28 +125,39 @@ needed at compile time to produce a traditional FSM where only one state is
|
|||||||
ever active at once. I believe some other regex matchers work this way.
|
ever active at once. I believe some other regex matchers work this way.
|
||||||
|
|
||||||
|
|
||||||
|
Changeable options
|
||||||
|
------------------
|
||||||
|
|
||||||
|
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
|
||||||
|
change in the middle of patterns. From PCRE 8.13, their processing is handled
|
||||||
|
entirely at compile time by generating different opcodes for the different
|
||||||
|
settings. The runtime functions do not need to keep track of an options state
|
||||||
|
any more.
|
||||||
|
|
||||||
|
|
||||||
Format of compiled patterns
|
Format of compiled patterns
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
The compiled form of a pattern is a vector of bytes, containing items of
|
The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
|
||||||
variable length. The first byte in an item is an opcode, and the length of the
|
shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
|
||||||
item is either implicit in the opcode or contained in the data bytes that
|
items of variable length. The first unit in an item contains an opcode, and
|
||||||
follow it.
|
the length of the item is either implicit in the opcode or contained in the
|
||||||
|
data that follows it.
|
||||||
|
|
||||||
In many cases below LINK_SIZE data values are specified for offsets within the
|
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||||
compiled pattern. The default value for LINK_SIZE is 2, but PCRE can be
|
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||||
compiled to use 3-byte or 4-byte values for these offsets (impairing the
|
default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
|
||||||
performance). This is necessary only when patterns whose compiled length is
|
4-byte values for these offsets, although this impairs the performance. (3-byte
|
||||||
greater than 64K are going to be processed. In this description, we assume the
|
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
||||||
"normal" compilation options. Data values that are counts (e.g. for
|
larger than 2 is necessary only when patterns whose compiled length is greater
|
||||||
quantifiers) are always just two bytes long.
|
than 64K are going to be processed. In this description, we assume the "normal"
|
||||||
|
compilation options. Data values that are counts (e.g. for quantifiers) are
|
||||||
A list of the opcodes follows:
|
always just two bytes long (one short in 16-bit mode).
|
||||||
|
|
||||||
Opcodes with no following data
|
Opcodes with no following data
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
These items are all just one byte long
|
These items are all just one unit long
|
||||||
|
|
||||||
OP_END end of pattern
|
OP_END end of pattern
|
||||||
OP_ANY match any one character other than newline
|
OP_ANY match any one character other than newline
|
||||||
@ -131,7 +166,8 @@ These items are all just one byte long
|
|||||||
OP_SOD match start of data: \A
|
OP_SOD match start of data: \A
|
||||||
OP_SOM, start of match (subject + offset): \G
|
OP_SOM, start of match (subject + offset): \G
|
||||||
OP_SET_SOM, set start of match (\K)
|
OP_SET_SOM, set start of match (\K)
|
||||||
OP_CIRC ^ (start of data, or after \n in multiline)
|
OP_CIRC ^ (start of data)
|
||||||
|
OP_CIRCM ^ multiline mode (start of data or after newline)
|
||||||
OP_NOT_WORD_BOUNDARY \W
|
OP_NOT_WORD_BOUNDARY \W
|
||||||
OP_WORD_BOUNDARY \w
|
OP_WORD_BOUNDARY \w
|
||||||
OP_NOT_DIGIT \D
|
OP_NOT_DIGIT \D
|
||||||
@ -146,48 +182,71 @@ These items are all just one byte long
|
|||||||
OP_WORDCHAR \w
|
OP_WORDCHAR \w
|
||||||
OP_EODN match end of data or \n at end: \Z
|
OP_EODN match end of data or \n at end: \Z
|
||||||
OP_EOD match end of data: \z
|
OP_EOD match end of data: \z
|
||||||
OP_DOLL $ (end of data, or before \n in multiline)
|
OP_DOLL $ (end of data, or before final newline)
|
||||||
|
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||||
OP_EXTUNI match an extended Unicode character
|
OP_EXTUNI match an extended Unicode character
|
||||||
OP_ANYNL match any Unicode newline sequence
|
OP_ANYNL match any Unicode newline sequence
|
||||||
|
|
||||||
OP_ACCEPT )
|
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||||
OP_COMMIT )
|
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||||
OP_FAIL ) These are Perl 5.10's "backtracking
|
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||||
OP_PRUNE ) control verbs".
|
OP_PRUNE ) OP_CLOSE, followed by a 2-byte number,
|
||||||
OP_SKIP )
|
OP_SKIP ) indicating which parentheses must be closed.
|
||||||
OP_THEN )
|
|
||||||
|
|
||||||
|
|
||||||
|
Backtracking control verbs with (optional) data
|
||||||
|
-----------------------------------------------
|
||||||
|
|
||||||
|
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||||
|
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
||||||
|
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
||||||
|
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
||||||
|
following in the same format.
|
||||||
|
|
||||||
|
|
||||||
|
Matching literal characters
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||||
|
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
||||||
|
the character may be more than one unit long. In UTF-32 mode, characters
|
||||||
|
are always exactly one unit long.
|
||||||
|
|
||||||
|
|
||||||
Repeating single characters
|
Repeating single characters
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
The common repeats (*, +, ?) when applied to a single character use the
|
The common repeats (*, +, ?), when applied to a single character, use the
|
||||||
following opcodes:
|
following opcodes, which come in caseful and caseless versions:
|
||||||
|
|
||||||
OP_STAR
|
Caseful Caseless
|
||||||
OP_MINSTAR
|
OP_STAR OP_STARI
|
||||||
OP_POSSTAR
|
OP_MINSTAR OP_MINSTARI
|
||||||
OP_PLUS
|
OP_POSSTAR OP_POSSTARI
|
||||||
OP_MINPLUS
|
OP_PLUS OP_PLUSI
|
||||||
OP_POSPLUS
|
OP_MINPLUS OP_MINPLUSI
|
||||||
OP_QUERY
|
OP_POSPLUS OP_POSPLUSI
|
||||||
OP_MINQUERY
|
OP_QUERY OP_QUERYI
|
||||||
OP_POSQUERY
|
OP_MINQUERY OP_MINQUERYI
|
||||||
|
OP_POSQUERY OP_POSQUERYI
|
||||||
|
|
||||||
In ASCII mode, these are two-byte items; in UTF-8 mode, the length is variable.
|
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
||||||
Those with "MIN" in their name are the minimizing versions. Those with "POS" in
|
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
||||||
their names are possessive versions. Each is followed by the character that is
|
UTF-32 mode these are one-unit items.
|
||||||
to be repeated. Other repeats make use of
|
Those with "MIN" in their names are the minimizing versions. Those with "POS"
|
||||||
|
in their names are possessive versions. Other repeats make use of these
|
||||||
|
opcodes:
|
||||||
|
|
||||||
OP_UPTO
|
Caseful Caseless
|
||||||
OP_MINUPTO
|
OP_UPTO OP_UPTOI
|
||||||
OP_POSUPTO
|
OP_MINUPTO OP_MINUPTOI
|
||||||
OP_EXACT
|
OP_POSUPTO OP_POSUPTOI
|
||||||
|
OP_EXACT OP_EXACTI
|
||||||
|
|
||||||
which are followed by a two-byte count (most significant first) and the
|
Each of these is followed by a two-byte (one short) count (most significant
|
||||||
repeated character. OP_UPTO matches from 0 to the given number. A repeat with a
|
byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
|
||||||
non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an
|
0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
|
||||||
OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
|
coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
|
||||||
|
|
||||||
|
|
||||||
Repeating character types
|
Repeating character types
|
||||||
@ -195,7 +254,7 @@ Repeating character types
|
|||||||
|
|
||||||
Repeats of things like \d are done exactly as for single characters, except
|
Repeats of things like \d are done exactly as for single characters, except
|
||||||
that instead of a character, the opcode for the type is stored in the data
|
that instead of a character, the opcode for the type is stored in the data
|
||||||
byte. The opcodes are:
|
unit. The opcodes are:
|
||||||
|
|
||||||
OP_TYPESTAR
|
OP_TYPESTAR
|
||||||
OP_TYPEMINSTAR
|
OP_TYPEMINSTAR
|
||||||
@ -217,65 +276,58 @@ Match by Unicode property
|
|||||||
|
|
||||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||||
character by testing its Unicode property (the \p and \P escape sequences).
|
character by testing its Unicode property (the \p and \P escape sequences).
|
||||||
Each is followed by two bytes that encode the desired property as a type and a
|
Each is followed by two units that encode the desired property as a type and a
|
||||||
value.
|
value.
|
||||||
|
|
||||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||||
three bytes: OP_PROP or OP_NOTPROP and then the desired property type and
|
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||||
value.
|
value.
|
||||||
|
|
||||||
|
|
||||||
Matching literal characters
|
|
||||||
---------------------------
|
|
||||||
|
|
||||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
|
||||||
casefully. For caseless matching, OP_CHARNC is used. In UTF-8 mode, the
|
|
||||||
character may be more than one byte long. (Earlier versions of PCRE used
|
|
||||||
multi-character strings, but this was changed to allow some new features to be
|
|
||||||
added.)
|
|
||||||
|
|
||||||
|
|
||||||
Character classes
|
Character classes
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
If there is only one character, OP_CHAR or OP_CHARNC is used for a positive
|
If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
|
||||||
class, and OP_NOT for a negative one (that is, for something like [^a]).
|
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||||
However, in UTF-8 mode, the use of OP_NOT applies only to characters with
|
something like [^a]).
|
||||||
values < 128, because OP_NOT is confined to single bytes.
|
|
||||||
|
|
||||||
Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a repeated,
|
Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
|
||||||
negated, single-character class. The normal ones (OP_STAR etc.) are used for a
|
repeated, negated, single-character classes. The normal single-character
|
||||||
repeated positive single-character class.
|
opcodes (OP_STAR, etc.) are used for repeated positive single-character
|
||||||
|
classes.
|
||||||
|
|
||||||
When there's more than one character in a class and all the characters are less
|
When there is more than one character in a class and all the characters are
|
||||||
than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a negative
|
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||||
one. In either case, the opcode is followed by a 32-byte bit map containing a 1
|
negative one. In either case, the opcode is followed by a 32-byte (16-short)
|
||||||
bit for every character that is acceptable. The bits are counted from the least
|
bit map containing a 1 bit for every character that is acceptable. The bits are
|
||||||
significant end of each byte.
|
counted from the least significant end of each unit. In caseless mode, bits for
|
||||||
|
both cases are set.
|
||||||
|
|
||||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 mode,
|
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
|
||||||
subject characters with values greater than 256 can be handled correctly. For
|
subject characters with values greater than 255 can be handled correctly. For
|
||||||
OP_CLASS they don't match, whereas for OP_NCLASS they do.
|
OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||||
|
|
||||||
For classes containing characters with values > 255, OP_XCLASS is used. It
|
For classes containing characters with values greater than 255, OP_XCLASS is
|
||||||
optionally uses a bit map (if any characters lie within it), followed by a list
|
used. It optionally uses a bit map (if any characters lie within it), followed
|
||||||
of pairs and single characters. There is a flag character than indicates
|
by a list of pairs (for a range) and single characters. In caseless mode, both
|
||||||
whether it's a positive or a negative class.
|
cases are explicitly listed. There is a flag character than indicates whether
|
||||||
|
it is a positive or a negative class.
|
||||||
|
|
||||||
|
|
||||||
Back references
|
Back references
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
OP_REF is followed by two bytes containing the reference number.
|
OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
|
||||||
|
containing the reference number.
|
||||||
|
|
||||||
|
|
||||||
Repeating character classes and back references
|
Repeating character classes and back references
|
||||||
-----------------------------------------------
|
-----------------------------------------------
|
||||||
|
|
||||||
Single-character classes are handled specially (see above). This section
|
Single-character classes are handled specially (see above). This section
|
||||||
applies to OP_CLASS and OP_REF. In both cases, the repeat information follows
|
applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
|
||||||
the base item. The matching code looks at the following opcode to see if it is
|
follows the base item. The matching code looks at the following opcode to see
|
||||||
one of
|
if it is one of
|
||||||
|
|
||||||
OP_CRSTAR
|
OP_CRSTAR
|
||||||
OP_CRMINSTAR
|
OP_CRMINSTAR
|
||||||
@ -286,10 +338,10 @@ one of
|
|||||||
OP_CRRANGE
|
OP_CRRANGE
|
||||||
OP_CRMINRANGE
|
OP_CRMINRANGE
|
||||||
|
|
||||||
All but the last two are just single-byte items. The others are followed by
|
All but the last two are just single-unit items. The others are followed by
|
||||||
four bytes of data, comprising the minimum and maximum repeat counts. There are
|
four bytes (two shorts) of data, comprising the minimum and maximum repeat
|
||||||
no special possessive opcodes for these repeats; a possessive repeat is
|
counts. There are no special possessive opcodes for these repeats; a possessive
|
||||||
compiled into an atomic group.
|
repeat is compiled into an atomic group.
|
||||||
|
|
||||||
|
|
||||||
Brackets and alternation
|
Brackets and alternation
|
||||||
@ -299,7 +351,8 @@ A pair of non-capturing (round) brackets is wrapped round each expression at
|
|||||||
compile time, so alternation always happens in the context of brackets.
|
compile time, so alternation always happens in the context of brackets.
|
||||||
|
|
||||||
[Note for North Americans: "bracket" to some English speakers, including
|
[Note for North Americans: "bracket" to some English speakers, including
|
||||||
myself, can be round, square, curly, or pointy. Hence this usage.]
|
myself, can be round, square, curly, or pointy. Hence this usage rather than
|
||||||
|
"parentheses".]
|
||||||
|
|
||||||
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
|
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
|
||||||
capturing brackets and it used a different opcode for each one. From release
|
capturing brackets and it used a different opcode for each one. From release
|
||||||
@ -311,16 +364,17 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
|||||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||||
number immediately follows the offset, always as a 2-byte item.
|
number immediately follows the offset, always as a 2-byte (one short) item.
|
||||||
|
|
||||||
OP_KET is used for subpatterns that do not repeat indefinitely, while
|
OP_KET is used for subpatterns that do not repeat indefinitely, and
|
||||||
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
||||||
maximally respectively. All three are followed by LINK_SIZE bytes giving (as a
|
maximally respectively (see below for possessive repetitions). All three are
|
||||||
positive number) the offset back to the matching bracket opcode.
|
followed by LINK_SIZE bytes giving (as a positive number) the offset back to
|
||||||
|
the matching bracket opcode.
|
||||||
|
|
||||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||||
single-byte opcodes that tell the matcher that skipping the following
|
single-unit opcodes that tell the matcher that skipping the following
|
||||||
subpattern entirely is a valid branch. In the case of the first two, not
|
subpattern entirely is a valid branch. In the case of the first two, not
|
||||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||||
@ -343,6 +397,15 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
|||||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||||
OP_KETRMAX, and if so, to break the loop.
|
OP_KETRMAX, and if so, to break the loop.
|
||||||
|
|
||||||
|
Possessive brackets
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
When a repeated group (capturing or non-capturing) is marked as possessive by
|
||||||
|
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
|
||||||
|
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead
|
||||||
|
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
||||||
|
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||||
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
----------
|
----------
|
||||||
@ -350,11 +413,12 @@ Assertions
|
|||||||
Forward assertions are just like other subpatterns, but starting with one of
|
Forward assertions are just like other subpatterns, but starting with one of
|
||||||
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||||
is OP_REVERSE, followed by a two byte count of the number of characters to move
|
is OP_REVERSE, followed by a two byte (one short) count of the number of
|
||||||
back the pointer in the subject string. When operating in UTF-8 mode, the count
|
characters to move back the pointer in the subject string. In ASCII mode, the
|
||||||
is a character count rather than a byte count. A separate count is present in
|
count is a number of units, but in UTF-8/16 mode each character may occupy more
|
||||||
each alternative of a lookbehind assertion, allowing them to have different
|
than one unit; in UTF-32 mode each character occupies exactly one unit.
|
||||||
fixed lengths.
|
A separate count is present in each alternative of a lookbehind
|
||||||
|
assertion, allowing them to have different fixed lengths.
|
||||||
|
|
||||||
|
|
||||||
Once-only (atomic) subpatterns
|
Once-only (atomic) subpatterns
|
||||||
@ -371,13 +435,17 @@ Conditional subpatterns
|
|||||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||||
the condition is a back reference, this is stored at the start of the
|
the condition is a back reference, this is stored at the start of the
|
||||||
subpattern using the opcode OP_CREF followed by two bytes containing the
|
subpattern using the opcode OP_CREF followed by two bytes (one short)
|
||||||
reference number. If the condition is "in recursion" (coded as "(?(R)"), or "in
|
containing the reference number. OP_NCREF is used instead if the reference was
|
||||||
recursion of group x" (coded as "(?(Rx)"), the group number is stored at the
|
generated by name (so that the runtime code knows to check for duplicate
|
||||||
start of the subpattern using the opcode OP_RREF, and a value of zero for "the
|
names).
|
||||||
whole pattern". For a DEFINE condition, just the single byte OP_DEF is used (it
|
|
||||||
has no associated data). Otherwise, a conditional subpattern always starts with
|
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||||
one of the assertions.
|
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||||
|
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
|
||||||
|
zero for "the whole pattern". For a DEFINE condition, just the single unit
|
||||||
|
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
|
||||||
|
always starts with one of the assertions.
|
||||||
|
|
||||||
|
|
||||||
Recursion
|
Recursion
|
||||||
@ -394,25 +462,12 @@ are not strictly a recursion.
|
|||||||
Callout
|
Callout
|
||||||
-------
|
-------
|
||||||
|
|
||||||
OP_CALLOUT is followed by one byte of data that holds a callout number in the
|
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
||||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||||
cases there follows a two-byte value giving the offset in the pattern to the
|
cases there follows a two-byte (one short) value giving the offset in the
|
||||||
start of the following item, and another two-byte item giving the length of the
|
pattern to the start of the following item, and another two-byte (one short)
|
||||||
next item.
|
item giving the length of the next item.
|
||||||
|
|
||||||
|
|
||||||
Changing options
|
|
||||||
----------------
|
|
||||||
|
|
||||||
If any of the /i, /m, or /s options are changed within a pattern, an OP_OPT
|
|
||||||
opcode is compiled, followed by one byte containing the new settings of these
|
|
||||||
flags. If there are several alternatives, there is an occurrence of OP_OPT at
|
|
||||||
the start of all those following the first options change, to set appropriate
|
|
||||||
options for the start of the alternative. Immediately after the end of the
|
|
||||||
group there is another such item to reset the flags to their previous values. A
|
|
||||||
change of flag right at the very start of the pattern can be handled entirely
|
|
||||||
at compile time, and so does not cause anything to be put into the compiled
|
|
||||||
data.
|
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
April 2008
|
February 2012
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
Installation Instructions
|
Installation Instructions
|
||||||
*************************
|
*************************
|
||||||
|
|
||||||
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
|
Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
|
||||||
2006, 2007, 2008 Free Software Foundation, Inc.
|
Inc.
|
||||||
|
|
||||||
This file is free documentation; the Free Software Foundation gives
|
Copying and distribution of this file, with or without modification,
|
||||||
unlimited permission to copy, distribute and modify it.
|
are permitted in any medium without royalty provided the copyright
|
||||||
|
notice and this notice are preserved. This file is offered as-is,
|
||||||
|
without warranty of any kind.
|
||||||
|
|
||||||
Basic Installation
|
Basic Installation
|
||||||
==================
|
==================
|
||||||
@ -13,7 +15,11 @@ Basic Installation
|
|||||||
Briefly, the shell commands `./configure; make; make install' should
|
Briefly, the shell commands `./configure; make; make install' should
|
||||||
configure, build, and install this package. The following
|
configure, build, and install this package. The following
|
||||||
more-detailed instructions are generic; see the `README' file for
|
more-detailed instructions are generic; see the `README' file for
|
||||||
instructions specific to this package.
|
instructions specific to this package. Some packages provide this
|
||||||
|
`INSTALL' file but do not implement all of the features documented
|
||||||
|
below. The lack of an optional feature in a given package is not
|
||||||
|
necessarily a bug. More recommendations for GNU packages can be found
|
||||||
|
in *note Makefile Conventions: (standards)Makefile Conventions.
|
||||||
|
|
||||||
The `configure' shell script attempts to guess correct values for
|
The `configure' shell script attempts to guess correct values for
|
||||||
various system-dependent variables used during compilation. It uses
|
various system-dependent variables used during compilation. It uses
|
||||||
@ -42,7 +48,7 @@ may remove or edit it.
|
|||||||
you want to change it or regenerate `configure' using a newer version
|
you want to change it or regenerate `configure' using a newer version
|
||||||
of `autoconf'.
|
of `autoconf'.
|
||||||
|
|
||||||
The simplest way to compile this package is:
|
The simplest way to compile this package is:
|
||||||
|
|
||||||
1. `cd' to the directory containing the package's source code and type
|
1. `cd' to the directory containing the package's source code and type
|
||||||
`./configure' to configure the package for your system.
|
`./configure' to configure the package for your system.
|
||||||
@ -53,12 +59,22 @@ The simplest way to compile this package is:
|
|||||||
2. Type `make' to compile the package.
|
2. Type `make' to compile the package.
|
||||||
|
|
||||||
3. Optionally, type `make check' to run any self-tests that come with
|
3. Optionally, type `make check' to run any self-tests that come with
|
||||||
the package.
|
the package, generally using the just-built uninstalled binaries.
|
||||||
|
|
||||||
4. Type `make install' to install the programs and any data files and
|
4. Type `make install' to install the programs and any data files and
|
||||||
documentation.
|
documentation. When installing into a prefix owned by root, it is
|
||||||
|
recommended that the package be configured and built as a regular
|
||||||
|
user, and only the `make install' phase executed with root
|
||||||
|
privileges.
|
||||||
|
|
||||||
5. You can remove the program binaries and object files from the
|
5. Optionally, type `make installcheck' to repeat any self-tests, but
|
||||||
|
this time using the binaries in their final installed location.
|
||||||
|
This target does not install anything. Running this target as a
|
||||||
|
regular user, particularly if the prior `make install' required
|
||||||
|
root privileges, verifies that the installation completed
|
||||||
|
correctly.
|
||||||
|
|
||||||
|
6. You can remove the program binaries and object files from the
|
||||||
source code directory by typing `make clean'. To also remove the
|
source code directory by typing `make clean'. To also remove the
|
||||||
files that `configure' created (so you can compile the package for
|
files that `configure' created (so you can compile the package for
|
||||||
a different kind of computer), type `make distclean'. There is
|
a different kind of computer), type `make distclean'. There is
|
||||||
@ -67,8 +83,15 @@ The simplest way to compile this package is:
|
|||||||
all sorts of other programs in order to regenerate files that came
|
all sorts of other programs in order to regenerate files that came
|
||||||
with the distribution.
|
with the distribution.
|
||||||
|
|
||||||
6. Often, you can also type `make uninstall' to remove the installed
|
7. Often, you can also type `make uninstall' to remove the installed
|
||||||
files again.
|
files again. In practice, not all packages have tested that
|
||||||
|
uninstallation works correctly, even though it is required by the
|
||||||
|
GNU Coding Standards.
|
||||||
|
|
||||||
|
8. Some packages, particularly those that use Automake, provide `make
|
||||||
|
distcheck', which can by used by developers to test that all other
|
||||||
|
targets like `make install' and `make uninstall' work correctly.
|
||||||
|
This target is generally not run by end users.
|
||||||
|
|
||||||
Compilers and Options
|
Compilers and Options
|
||||||
=====================
|
=====================
|
||||||
@ -93,7 +116,8 @@ same time, by placing the object files for each architecture in their
|
|||||||
own directory. To do this, you can use GNU `make'. `cd' to the
|
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||||
directory where you want the object files and executables to go and run
|
directory where you want the object files and executables to go and run
|
||||||
the `configure' script. `configure' automatically checks for the
|
the `configure' script. `configure' automatically checks for the
|
||||||
source code in the directory that `configure' is in and in `..'.
|
source code in the directory that `configure' is in and in `..'. This
|
||||||
|
is known as a "VPATH" build.
|
||||||
|
|
||||||
With a non-GNU `make', it is safer to compile the package for one
|
With a non-GNU `make', it is safer to compile the package for one
|
||||||
architecture at a time in the source code directory. After you have
|
architecture at a time in the source code directory. After you have
|
||||||
@ -120,7 +144,8 @@ Installation Names
|
|||||||
By default, `make install' installs the package's commands under
|
By default, `make install' installs the package's commands under
|
||||||
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||||
can specify an installation prefix other than `/usr/local' by giving
|
can specify an installation prefix other than `/usr/local' by giving
|
||||||
`configure' the option `--prefix=PREFIX'.
|
`configure' the option `--prefix=PREFIX', where PREFIX must be an
|
||||||
|
absolute file name.
|
||||||
|
|
||||||
You can specify separate installation prefixes for
|
You can specify separate installation prefixes for
|
||||||
architecture-specific files and architecture-independent files. If you
|
architecture-specific files and architecture-independent files. If you
|
||||||
@ -131,15 +156,46 @@ Documentation and other data files still use the regular prefix.
|
|||||||
In addition, if you use an unusual directory layout you can give
|
In addition, if you use an unusual directory layout you can give
|
||||||
options like `--bindir=DIR' to specify different values for particular
|
options like `--bindir=DIR' to specify different values for particular
|
||||||
kinds of files. Run `configure --help' for a list of the directories
|
kinds of files. Run `configure --help' for a list of the directories
|
||||||
you can set and what kinds of files go in them.
|
you can set and what kinds of files go in them. In general, the
|
||||||
|
default for these options is expressed in terms of `${prefix}', so that
|
||||||
|
specifying just `--prefix' will affect all of the other directory
|
||||||
|
specifications that were not explicitly provided.
|
||||||
|
|
||||||
|
The most portable way to affect installation locations is to pass the
|
||||||
|
correct locations to `configure'; however, many packages provide one or
|
||||||
|
both of the following shortcuts of passing variable assignments to the
|
||||||
|
`make install' command line to change installation locations without
|
||||||
|
having to reconfigure or recompile.
|
||||||
|
|
||||||
|
The first method involves providing an override variable for each
|
||||||
|
affected directory. For example, `make install
|
||||||
|
prefix=/alternate/directory' will choose an alternate location for all
|
||||||
|
directory configuration variables that were expressed in terms of
|
||||||
|
`${prefix}'. Any directories that were specified during `configure',
|
||||||
|
but not in terms of `${prefix}', must each be overridden at install
|
||||||
|
time for the entire installation to be relocated. The approach of
|
||||||
|
makefile variable overrides for each directory variable is required by
|
||||||
|
the GNU Coding Standards, and ideally causes no recompilation.
|
||||||
|
However, some platforms have known limitations with the semantics of
|
||||||
|
shared libraries that end up requiring recompilation when using this
|
||||||
|
method, particularly noticeable in packages that use GNU Libtool.
|
||||||
|
|
||||||
|
The second method involves providing the `DESTDIR' variable. For
|
||||||
|
example, `make install DESTDIR=/alternate/directory' will prepend
|
||||||
|
`/alternate/directory' before all installation names. The approach of
|
||||||
|
`DESTDIR' overrides is not required by the GNU Coding Standards, and
|
||||||
|
does not work on platforms that have drive letters. On the other hand,
|
||||||
|
it does better at avoiding recompilation issues, and works well even
|
||||||
|
when some directory options were not specified in terms of `${prefix}'
|
||||||
|
at `configure' time.
|
||||||
|
|
||||||
|
Optional Features
|
||||||
|
=================
|
||||||
|
|
||||||
If the package supports it, you can cause programs to be installed
|
If the package supports it, you can cause programs to be installed
|
||||||
with an extra prefix or suffix on their names by giving `configure' the
|
with an extra prefix or suffix on their names by giving `configure' the
|
||||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||||
|
|
||||||
Optional Features
|
|
||||||
=================
|
|
||||||
|
|
||||||
Some packages pay attention to `--enable-FEATURE' options to
|
Some packages pay attention to `--enable-FEATURE' options to
|
||||||
`configure', where FEATURE indicates an optional part of the package.
|
`configure', where FEATURE indicates an optional part of the package.
|
||||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||||
@ -152,6 +208,13 @@ find the X include and library files automatically, but if it doesn't,
|
|||||||
you can use the `configure' options `--x-includes=DIR' and
|
you can use the `configure' options `--x-includes=DIR' and
|
||||||
`--x-libraries=DIR' to specify their locations.
|
`--x-libraries=DIR' to specify their locations.
|
||||||
|
|
||||||
|
Some packages offer the ability to configure how verbose the
|
||||||
|
execution of `make' will be. For these packages, running `./configure
|
||||||
|
--enable-silent-rules' sets the default to minimal output, which can be
|
||||||
|
overridden with `make V=1'; while running `./configure
|
||||||
|
--disable-silent-rules' sets the default to verbose, which can be
|
||||||
|
overridden with `make V=0'.
|
||||||
|
|
||||||
Particular systems
|
Particular systems
|
||||||
==================
|
==================
|
||||||
|
|
||||||
@ -159,10 +222,15 @@ Particular systems
|
|||||||
CC is not installed, it is recommended to use the following options in
|
CC is not installed, it is recommended to use the following options in
|
||||||
order to use an ANSI C compiler:
|
order to use an ANSI C compiler:
|
||||||
|
|
||||||
./configure CC="cc -Ae"
|
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
|
||||||
|
|
||||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||||
|
|
||||||
|
HP-UX `make' updates targets which have the same time stamps as
|
||||||
|
their prerequisites, which makes it generally unusable when shipped
|
||||||
|
generated files such as `configure' are involved. Use GNU `make'
|
||||||
|
instead.
|
||||||
|
|
||||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||||
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
|
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
|
||||||
a workaround. If GNU CC is not installed, it is therefore recommended
|
a workaround. If GNU CC is not installed, it is therefore recommended
|
||||||
@ -174,6 +242,16 @@ and if that doesn't work, try
|
|||||||
|
|
||||||
./configure CC="cc -nodtk"
|
./configure CC="cc -nodtk"
|
||||||
|
|
||||||
|
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
|
||||||
|
directory contains several dysfunctional programs; working variants of
|
||||||
|
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
|
||||||
|
in your `PATH', put it _after_ `/usr/bin'.
|
||||||
|
|
||||||
|
On Haiku, software installed for all users goes in `/boot/common',
|
||||||
|
not `/usr/local'. It is recommended to use the following options:
|
||||||
|
|
||||||
|
./configure --prefix=/boot/common
|
||||||
|
|
||||||
Specifying the System Type
|
Specifying the System Type
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
@ -189,7 +267,8 @@ type, such as `sun4', or a canonical name which has the form:
|
|||||||
|
|
||||||
where SYSTEM can have one of these forms:
|
where SYSTEM can have one of these forms:
|
||||||
|
|
||||||
OS KERNEL-OS
|
OS
|
||||||
|
KERNEL-OS
|
||||||
|
|
||||||
See the file `config.sub' for the possible values of each field. If
|
See the file `config.sub' for the possible values of each field. If
|
||||||
`config.sub' isn't included in this package, then this package doesn't
|
`config.sub' isn't included in this package, then this package doesn't
|
||||||
@ -277,7 +356,7 @@ operates.
|
|||||||
`configure' can determine that directory automatically.
|
`configure' can determine that directory automatically.
|
||||||
|
|
||||||
`--prefix=DIR'
|
`--prefix=DIR'
|
||||||
Use DIR as the installation prefix. *Note Installation Names::
|
Use DIR as the installation prefix. *note Installation Names::
|
||||||
for more details, including other options available for fine-tuning
|
for more details, including other options available for fine-tuning
|
||||||
the installation locations.
|
the installation locations.
|
||||||
|
|
||||||
|
@ -4,12 +4,14 @@ PCRE LICENCE
|
|||||||
PCRE is a library of functions to support regular expressions whose syntax
|
PCRE is a library of functions to support regular expressions whose syntax
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||||
specified below. The documentation for PCRE, supplied in the "doc"
|
specified below. The documentation for PCRE, supplied in the "doc"
|
||||||
directory, is distributed under the same terms as the software itself.
|
directory, is distributed under the same terms as the software itself.
|
||||||
|
|
||||||
The basic library functions are written in C and are freestanding. Also
|
The basic library functions are written in C and are freestanding. Also
|
||||||
included in the distribution is a set of C++ wrapper functions.
|
included in the distribution is a set of C++ wrapper functions, and a
|
||||||
|
just-in-time compiler that can be used to optimize pattern matching. These
|
||||||
|
are both optional features that can be omitted when the library is built.
|
||||||
|
|
||||||
|
|
||||||
THE BASIC LIBRARY FUNCTIONS
|
THE BASIC LIBRARY FUNCTIONS
|
||||||
@ -22,7 +24,29 @@ Email domain: cam.ac.uk
|
|||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2009 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
PCRE JUST-IN-TIME COMPILATION SUPPORT
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
Written by: Zoltan Herczeg
|
||||||
|
Email local part: hzmester
|
||||||
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
|
Copyright(c) 2010-2012 Zoltan Herczeg
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
STACK-LESS JUST-IN-TIME COMPILER
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
Written by: Zoltan Herczeg
|
||||||
|
Email local part: hzmester
|
||||||
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
|
Copyright(c) 2009-2012 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
@ -31,7 +55,7 @@ THE C++ WRAPPER FUNCTIONS
|
|||||||
|
|
||||||
Contributed by: Google Inc.
|
Contributed by: Google Inc.
|
||||||
|
|
||||||
Copyright (c) 2007-2008, Google Inc.
|
Copyright (c) 2007-2012, Google Inc.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
## Process this file with automake to produce Makefile.in.
|
## Process this file with automake to produce Makefile.in.
|
||||||
|
|
||||||
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
|
||||||
dist_doc_DATA = \
|
dist_doc_DATA = \
|
||||||
doc/pcre.txt \
|
doc/pcre.txt \
|
||||||
doc/pcre-config.txt \
|
doc/pcre-config.txt \
|
||||||
@ -15,7 +17,9 @@ dist_doc_DATA = \
|
|||||||
dist_html_DATA = \
|
dist_html_DATA = \
|
||||||
doc/html/index.html \
|
doc/html/index.html \
|
||||||
doc/html/pcre.html \
|
doc/html/pcre.html \
|
||||||
|
doc/html/pcre16.html \
|
||||||
doc/html/pcre-config.html \
|
doc/html/pcre-config.html \
|
||||||
|
doc/html/pcre_assign_jit_stack.html \
|
||||||
doc/html/pcre_compile.html \
|
doc/html/pcre_compile.html \
|
||||||
doc/html/pcre_compile2.html \
|
doc/html/pcre_compile2.html \
|
||||||
doc/html/pcre_config.html \
|
doc/html/pcre_config.html \
|
||||||
@ -23,6 +27,7 @@ dist_html_DATA = \
|
|||||||
doc/html/pcre_copy_substring.html \
|
doc/html/pcre_copy_substring.html \
|
||||||
doc/html/pcre_dfa_exec.html \
|
doc/html/pcre_dfa_exec.html \
|
||||||
doc/html/pcre_exec.html \
|
doc/html/pcre_exec.html \
|
||||||
|
doc/html/pcre_free_study.html \
|
||||||
doc/html/pcre_free_substring.html \
|
doc/html/pcre_free_substring.html \
|
||||||
doc/html/pcre_free_substring_list.html \
|
doc/html/pcre_free_substring_list.html \
|
||||||
doc/html/pcre_fullinfo.html \
|
doc/html/pcre_fullinfo.html \
|
||||||
@ -31,16 +36,23 @@ dist_html_DATA = \
|
|||||||
doc/html/pcre_get_stringtable_entries.html \
|
doc/html/pcre_get_stringtable_entries.html \
|
||||||
doc/html/pcre_get_substring.html \
|
doc/html/pcre_get_substring.html \
|
||||||
doc/html/pcre_get_substring_list.html \
|
doc/html/pcre_get_substring_list.html \
|
||||||
doc/html/pcre_info.html \
|
doc/html/pcre_jit_exec.html \
|
||||||
|
doc/html/pcre_jit_stack_alloc.html \
|
||||||
|
doc/html/pcre_jit_stack_free.html \
|
||||||
doc/html/pcre_maketables.html \
|
doc/html/pcre_maketables.html \
|
||||||
|
doc/html/pcre_pattern_to_host_byte_order.html \
|
||||||
doc/html/pcre_refcount.html \
|
doc/html/pcre_refcount.html \
|
||||||
doc/html/pcre_study.html \
|
doc/html/pcre_study.html \
|
||||||
|
doc/html/pcre_utf16_to_host_byte_order.html \
|
||||||
doc/html/pcre_version.html \
|
doc/html/pcre_version.html \
|
||||||
doc/html/pcreapi.html \
|
doc/html/pcreapi.html \
|
||||||
doc/html/pcrebuild.html \
|
doc/html/pcrebuild.html \
|
||||||
doc/html/pcrecallout.html \
|
doc/html/pcrecallout.html \
|
||||||
doc/html/pcrecompat.html \
|
doc/html/pcrecompat.html \
|
||||||
|
doc/html/pcredemo.html \
|
||||||
doc/html/pcregrep.html \
|
doc/html/pcregrep.html \
|
||||||
|
doc/html/pcrejit.html \
|
||||||
|
doc/html/pcrelimits.html \
|
||||||
doc/html/pcrematching.html \
|
doc/html/pcrematching.html \
|
||||||
doc/html/pcrepartial.html \
|
doc/html/pcrepartial.html \
|
||||||
doc/html/pcrepattern.html \
|
doc/html/pcrepattern.html \
|
||||||
@ -50,7 +62,12 @@ dist_html_DATA = \
|
|||||||
doc/html/pcresample.html \
|
doc/html/pcresample.html \
|
||||||
doc/html/pcrestack.html \
|
doc/html/pcrestack.html \
|
||||||
doc/html/pcresyntax.html \
|
doc/html/pcresyntax.html \
|
||||||
doc/html/pcretest.html
|
doc/html/pcretest.html \
|
||||||
|
doc/html/pcreunicode.html
|
||||||
|
|
||||||
|
# doc/html/pcre32.html \
|
||||||
|
# doc/html/pcre_utf32_to_host_byte_order.html \
|
||||||
|
#
|
||||||
|
|
||||||
pcrecpp_html = doc/html/pcrecpp.html
|
pcrecpp_html = doc/html/pcrecpp.html
|
||||||
dist_noinst_DATA = $(pcrecpp_html)
|
dist_noinst_DATA = $(pcrecpp_html)
|
||||||
@ -69,7 +86,8 @@ check_SCRIPTS =
|
|||||||
dist_noinst_SCRIPTS =
|
dist_noinst_SCRIPTS =
|
||||||
|
|
||||||
# Some of the binaries we make are to be installed, and others are
|
# Some of the binaries we make are to be installed, and others are
|
||||||
# (non-user-visible) helper programs needed to build libpcre.
|
# (non-user-visible) helper programs needed to build libpcre, libpcre16
|
||||||
|
# or libpcre32.
|
||||||
bin_PROGRAMS =
|
bin_PROGRAMS =
|
||||||
noinst_PROGRAMS =
|
noinst_PROGRAMS =
|
||||||
|
|
||||||
@ -81,15 +99,21 @@ MAINTAINERCLEANFILES =
|
|||||||
# the Autotools include by default.
|
# the Autotools include by default.
|
||||||
EXTRA_DIST =
|
EXTRA_DIST =
|
||||||
|
|
||||||
|
# These files contain additional m4 macros that are used by autoconf.
|
||||||
|
EXTRA_DIST += \
|
||||||
|
m4/ax_pthread.m4 m4/pcre_visibility.m4
|
||||||
|
|
||||||
# These files contain maintenance information
|
# These files contain maintenance information
|
||||||
EXTRA_DIST += \
|
EXTRA_DIST += \
|
||||||
doc/perltest.txt \
|
doc/perltest.txt \
|
||||||
NON-UNIX-USE \
|
NON-UNIX-USE \
|
||||||
|
NON-AUTOTOOLS-BUILD \
|
||||||
HACKING
|
HACKING
|
||||||
|
|
||||||
# These files are used in the preparation of a release
|
# These files are used in the preparation of a release
|
||||||
EXTRA_DIST += \
|
EXTRA_DIST += \
|
||||||
PrepareRelease \
|
PrepareRelease \
|
||||||
|
CheckMan \
|
||||||
CleanTxt \
|
CleanTxt \
|
||||||
Detrail \
|
Detrail \
|
||||||
132html \
|
132html \
|
||||||
@ -109,11 +133,39 @@ EXTRA_DIST += \
|
|||||||
pcre.h.generic \
|
pcre.h.generic \
|
||||||
config.h.generic
|
config.h.generic
|
||||||
|
|
||||||
pcre.h.generic: configure.ac
|
# The only difference between pcre.h.in and pcre.h is the setting of the PCRE
|
||||||
|
# version number. Therefore, we can create the generic version just by copying.
|
||||||
|
pcre.h.generic: pcre.h.in configure.ac
|
||||||
rm -f $@
|
rm -f $@
|
||||||
cp -p pcre.h $@
|
cp -p pcre.h $@
|
||||||
|
|
||||||
MAINTAINERCLEANFILES += pcre.h.generic
|
# It is more complicated for config.h.generic. We need the version that results
|
||||||
|
# from a default configuration. We can get this by doing a configure in a
|
||||||
|
# temporary directory. However, some trickery is needed,
|
||||||
|
# because the source directory may already be configured. If you
|
||||||
|
# just try running configure in a new directory, it complains. For this reason,
|
||||||
|
# we move config.status out of the way while doing the default configuration.
|
||||||
|
# The resulting config.h is munged by perl to put #ifdefs round any #defines
|
||||||
|
# and to get rid of any gcc-specific visibility settings. Make sure that
|
||||||
|
# PCRE_EXP_DEFN is unset (in case it has visibility settings).
|
||||||
|
config.h.generic: configure.ac
|
||||||
|
rm -rf $@ _generic
|
||||||
|
mkdir _generic
|
||||||
|
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
|
||||||
|
cd _generic && $(abs_top_srcdir)/configure || :
|
||||||
|
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
|
||||||
|
test -f _generic/config.h
|
||||||
|
perl -n \
|
||||||
|
-e 'BEGIN{$$blank=0;}' \
|
||||||
|
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
|
||||||
|
-e 'if(/to make a symbol visible/){next;}' \
|
||||||
|
-e 'if(/__attribute__ \(\(visibility/){next;}' \
|
||||||
|
-e 'if(/^#define\s(?!PACKAGE)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;}' \
|
||||||
|
-e 'else {if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}}' \
|
||||||
|
_generic/config.h >$@
|
||||||
|
rm -rf _generic
|
||||||
|
|
||||||
|
MAINTAINERCLEANFILES += pcre.h.generic config.h.generic
|
||||||
|
|
||||||
# These are the header files we'll install. We do not distribute pcre.h because
|
# These are the header files we'll install. We do not distribute pcre.h because
|
||||||
# it is generated from pcre.h.in.
|
# it is generated from pcre.h.in.
|
||||||
@ -158,10 +210,16 @@ pcre_chartables.c: $(srcdir)/pcre_chartables.c.dist
|
|||||||
|
|
||||||
endif # WITH_REBUILD_CHARTABLES
|
endif # WITH_REBUILD_CHARTABLES
|
||||||
|
|
||||||
|
BUILT_SOURCES = pcre_chartables.c
|
||||||
|
|
||||||
## The main pcre library
|
## The main pcre library
|
||||||
|
|
||||||
|
# Build the 8 bit library if it is enabled.
|
||||||
|
if WITH_PCRE8
|
||||||
lib_LTLIBRARIES += libpcre.la
|
lib_LTLIBRARIES += libpcre.la
|
||||||
|
|
||||||
libpcre_la_SOURCES = \
|
libpcre_la_SOURCES = \
|
||||||
|
pcre_byte_order.c \
|
||||||
pcre_compile.c \
|
pcre_compile.c \
|
||||||
pcre_config.c \
|
pcre_config.c \
|
||||||
pcre_dfa_exec.c \
|
pcre_dfa_exec.c \
|
||||||
@ -169,41 +227,211 @@ libpcre_la_SOURCES = \
|
|||||||
pcre_fullinfo.c \
|
pcre_fullinfo.c \
|
||||||
pcre_get.c \
|
pcre_get.c \
|
||||||
pcre_globals.c \
|
pcre_globals.c \
|
||||||
pcre_info.c \
|
|
||||||
pcre_internal.h \
|
pcre_internal.h \
|
||||||
|
pcre_jit_compile.c \
|
||||||
pcre_maketables.c \
|
pcre_maketables.c \
|
||||||
pcre_newline.c \
|
pcre_newline.c \
|
||||||
pcre_ord2utf8.c \
|
pcre_ord2utf8.c \
|
||||||
pcre_refcount.c \
|
pcre_refcount.c \
|
||||||
|
pcre_string_utils.c \
|
||||||
pcre_study.c \
|
pcre_study.c \
|
||||||
pcre_tables.c \
|
pcre_tables.c \
|
||||||
pcre_try_flipped.c \
|
|
||||||
pcre_ucd.c \
|
pcre_ucd.c \
|
||||||
pcre_valid_utf8.c \
|
pcre_valid_utf8.c \
|
||||||
pcre_version.c \
|
pcre_version.c \
|
||||||
pcre_xclass.c \
|
pcre_xclass.c \
|
||||||
ucp.h
|
ucp.h
|
||||||
|
|
||||||
|
libpcre_la_CFLAGS = \
|
||||||
|
$(VISIBILITY_CFLAGS) \
|
||||||
|
$(AM_CFLAGS)
|
||||||
|
|
||||||
|
libpcre_la_LIBADD =
|
||||||
|
|
||||||
## This file is generated as part of the building process, so don't distribute.
|
## This file is generated as part of the building process, so don't distribute.
|
||||||
nodist_libpcre_la_SOURCES = \
|
nodist_libpcre_la_SOURCES = \
|
||||||
pcre_chartables.c
|
pcre_chartables.c
|
||||||
|
|
||||||
# The pcre_printint.src file is #included by some source files, so it must be
|
endif # WITH_PCRE8
|
||||||
# distributed. The pcre_chartables.c.dist file is the default version of
|
|
||||||
# pcre_chartables.c, used unless --enable-rebuild-chartables is specified.
|
|
||||||
EXTRA_DIST += pcre_printint.src pcre_chartables.c.dist
|
|
||||||
|
|
||||||
|
# Build the 16 bit library if it is enabled.
|
||||||
|
if WITH_PCRE16
|
||||||
|
lib_LTLIBRARIES += libpcre16.la
|
||||||
|
libpcre16_la_SOURCES = \
|
||||||
|
pcre16_byte_order.c \
|
||||||
|
pcre16_chartables.c \
|
||||||
|
pcre16_compile.c \
|
||||||
|
pcre16_config.c \
|
||||||
|
pcre16_dfa_exec.c \
|
||||||
|
pcre16_exec.c \
|
||||||
|
pcre16_fullinfo.c \
|
||||||
|
pcre16_get.c \
|
||||||
|
pcre16_globals.c \
|
||||||
|
pcre16_jit_compile.c \
|
||||||
|
pcre16_maketables.c \
|
||||||
|
pcre16_newline.c \
|
||||||
|
pcre16_ord2utf16.c \
|
||||||
|
pcre16_refcount.c \
|
||||||
|
pcre16_string_utils.c \
|
||||||
|
pcre16_study.c \
|
||||||
|
pcre16_tables.c \
|
||||||
|
pcre16_ucd.c \
|
||||||
|
pcre16_utf16_utils.c \
|
||||||
|
pcre16_valid_utf16.c \
|
||||||
|
pcre16_version.c \
|
||||||
|
pcre16_xclass.c
|
||||||
|
|
||||||
|
libpcre16_la_CFLAGS = \
|
||||||
|
$(VISIBILITY_CFLAGS) \
|
||||||
|
$(AM_CFLAGS)
|
||||||
|
|
||||||
|
libpcre16_la_LIBADD =
|
||||||
|
|
||||||
|
## This file is generated as part of the building process, so don't distribute.
|
||||||
|
nodist_libpcre16_la_SOURCES = \
|
||||||
|
pcre_chartables.c
|
||||||
|
|
||||||
|
endif # WITH_PCRE16
|
||||||
|
|
||||||
|
# Build the 32 bit library if it is enabled.
|
||||||
|
if WITH_PCRE32
|
||||||
|
lib_LTLIBRARIES += libpcre32.la
|
||||||
|
libpcre32_la_SOURCES = \
|
||||||
|
pcre32_byte_order.c \
|
||||||
|
pcre32_chartables.c \
|
||||||
|
pcre32_compile.c \
|
||||||
|
pcre32_config.c \
|
||||||
|
pcre32_dfa_exec.c \
|
||||||
|
pcre32_exec.c \
|
||||||
|
pcre32_fullinfo.c \
|
||||||
|
pcre32_get.c \
|
||||||
|
pcre32_globals.c \
|
||||||
|
pcre32_jit_compile.c \
|
||||||
|
pcre32_maketables.c \
|
||||||
|
pcre32_newline.c \
|
||||||
|
pcre32_ord2utf32.c \
|
||||||
|
pcre32_refcount.c \
|
||||||
|
pcre32_string_utils.c \
|
||||||
|
pcre32_study.c \
|
||||||
|
pcre32_tables.c \
|
||||||
|
pcre32_ucd.c \
|
||||||
|
pcre32_utf32_utils.c \
|
||||||
|
pcre32_valid_utf32.c \
|
||||||
|
pcre32_version.c \
|
||||||
|
pcre32_xclass.c
|
||||||
|
|
||||||
|
libpcre32_la_CFLAGS = \
|
||||||
|
$(VISIBILITY_CFLAGS) \
|
||||||
|
$(AM_CFLAGS)
|
||||||
|
|
||||||
|
libpcre32_la_LIBADD =
|
||||||
|
|
||||||
|
## This file is generated as part of the building process, so don't distribute.
|
||||||
|
nodist_libpcre32_la_SOURCES = \
|
||||||
|
pcre_chartables.c
|
||||||
|
|
||||||
|
endif # WITH_PCRE32
|
||||||
|
|
||||||
|
# The pcre_chartables.c.dist file is the default version of pcre_chartables.c,
|
||||||
|
# used unless --enable-rebuild-chartables is specified.
|
||||||
|
EXTRA_DIST += pcre_chartables.c.dist
|
||||||
|
|
||||||
|
# The JIT compiler lives in a separate directory, but its files are #included
|
||||||
|
# when pcre_jit_compile.c is processed, so they must be distributed.
|
||||||
|
EXTRA_DIST += \
|
||||||
|
sljit/sljitConfig.h \
|
||||||
|
sljit/sljitConfigInternal.h \
|
||||||
|
sljit/sljitExecAllocator.c \
|
||||||
|
sljit/sljitLir.c \
|
||||||
|
sljit/sljitLir.h \
|
||||||
|
sljit/sljitNativeARM_Thumb2.c \
|
||||||
|
sljit/sljitNativeARM_v5.c \
|
||||||
|
sljit/sljitNativeMIPS_32.c \
|
||||||
|
sljit/sljitNativeMIPS_common.c \
|
||||||
|
sljit/sljitNativePPC_32.c \
|
||||||
|
sljit/sljitNativePPC_64.c \
|
||||||
|
sljit/sljitNativePPC_common.c \
|
||||||
|
sljit/sljitNativeSPARC_32.c \
|
||||||
|
sljit/sljitNativeSPARC_common.c \
|
||||||
|
sljit/sljitNativeX86_32.c \
|
||||||
|
sljit/sljitNativeX86_64.c \
|
||||||
|
sljit/sljitNativeX86_common.c \
|
||||||
|
sljit/sljitUtils.c
|
||||||
|
|
||||||
|
if WITH_PCRE8
|
||||||
libpcre_la_LDFLAGS = $(EXTRA_LIBPCRE_LDFLAGS)
|
libpcre_la_LDFLAGS = $(EXTRA_LIBPCRE_LDFLAGS)
|
||||||
|
endif # WITH_PCRE8
|
||||||
|
if WITH_PCRE16
|
||||||
|
libpcre16_la_LDFLAGS = $(EXTRA_LIBPCRE16_LDFLAGS)
|
||||||
|
endif # WITH_PCRE16
|
||||||
|
if WITH_PCRE32
|
||||||
|
libpcre32_la_LDFLAGS = $(EXTRA_LIBPCRE32_LDFLAGS)
|
||||||
|
endif # WITH_PCRE32
|
||||||
|
|
||||||
|
if WITH_VALGRIND
|
||||||
|
if WITH_PCRE8
|
||||||
|
libpcre_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||||
|
endif # WITH_PCRE8
|
||||||
|
if WITH_PCRE16
|
||||||
|
libpcre16_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||||
|
endif # WITH_PCRE16
|
||||||
|
if WITH_PCRE32
|
||||||
|
libpcre32_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||||
|
endif # WITH_PCRE32
|
||||||
|
endif # WITH_VALGRIND
|
||||||
|
|
||||||
|
if WITH_GCOV
|
||||||
|
if WITH_PCRE8
|
||||||
|
libpcre_la_CFLAGS += $(GCOV_CFLAGS)
|
||||||
|
endif # WITH_PCRE8
|
||||||
|
if WITH_PCRE16
|
||||||
|
libpcre16_la_CFLAGS += $(GCOV_CFLAGS)
|
||||||
|
endif # WITH_PCRE16
|
||||||
|
if WITH_PCRE32
|
||||||
|
libpcre32_la_CFLAGS += $(GCOV_CFLAGS)
|
||||||
|
endif # WITH_PCRE32
|
||||||
|
endif # WITH_GCOV
|
||||||
|
|
||||||
CLEANFILES += pcre_chartables.c
|
CLEANFILES += pcre_chartables.c
|
||||||
|
|
||||||
|
## If JIT support is enabled, arrange for the JIT test program to run.
|
||||||
|
if WITH_JIT
|
||||||
|
TESTS += pcre_jit_test
|
||||||
|
noinst_PROGRAMS += pcre_jit_test
|
||||||
|
pcre_jit_test_SOURCES = pcre_jit_test.c
|
||||||
|
pcre_jit_test_CFLAGS = $(AM_CFLAGS)
|
||||||
|
pcre_jit_test_LDADD =
|
||||||
|
if WITH_PCRE8
|
||||||
|
pcre_jit_test_LDADD += libpcre.la
|
||||||
|
endif # WITH_PCRE8
|
||||||
|
if WITH_PCRE16
|
||||||
|
pcre_jit_test_LDADD += libpcre16.la
|
||||||
|
endif # WITH_PCRE16
|
||||||
|
if WITH_PCRE32
|
||||||
|
pcre_jit_test_LDADD += libpcre32.la
|
||||||
|
endif # WITH_PCRE32
|
||||||
|
if WITH_GCOV
|
||||||
|
pcre_jit_test_CFLAGS += $(GCOV_CFLAGS)
|
||||||
|
pcre_jit_test_LDADD += $(GCOV_LIBS)
|
||||||
|
endif # WITH_GCOV
|
||||||
|
endif # WITH_JIT
|
||||||
|
|
||||||
## A version of the main pcre library that has a posix re API.
|
## A version of the main pcre library that has a posix re API.
|
||||||
|
if WITH_PCRE8
|
||||||
|
|
||||||
lib_LTLIBRARIES += libpcreposix.la
|
lib_LTLIBRARIES += libpcreposix.la
|
||||||
libpcreposix_la_SOURCES = \
|
libpcreposix_la_SOURCES = \
|
||||||
pcreposix.c
|
pcreposix.c
|
||||||
|
libpcreposix_la_CFLAGS = $(VISIBILITY_CFLAGS) $(AM_CFLAGS)
|
||||||
libpcreposix_la_LDFLAGS = $(EXTRA_LIBPCREPOSIX_LDFLAGS)
|
libpcreposix_la_LDFLAGS = $(EXTRA_LIBPCREPOSIX_LDFLAGS)
|
||||||
libpcreposix_la_LIBADD = libpcre.la
|
libpcreposix_la_LIBADD = libpcre.la
|
||||||
|
|
||||||
|
if WITH_GCOV
|
||||||
|
libpcreposix_la_CFLAGS += $(GCOV_CFLAGS)
|
||||||
|
endif # WITH_GCOV
|
||||||
|
|
||||||
|
endif # WITH_PCRE8
|
||||||
|
|
||||||
## There's a C++ library as well.
|
## There's a C++ library as well.
|
||||||
if WITH_PCRE_CPP
|
if WITH_PCRE_CPP
|
||||||
|
|
||||||
@ -213,24 +441,35 @@ libpcrecpp_la_SOURCES = \
|
|||||||
pcrecpp.cc \
|
pcrecpp.cc \
|
||||||
pcre_scanner.cc \
|
pcre_scanner.cc \
|
||||||
pcre_stringpiece.cc
|
pcre_stringpiece.cc
|
||||||
|
libpcrecpp_la_CXXFLAGS = $(VISIBILITY_CXXFLAGS) $(AM_CXXFLAGS)
|
||||||
libpcrecpp_la_LDFLAGS = $(EXTRA_LIBPCRECPP_LDFLAGS)
|
libpcrecpp_la_LDFLAGS = $(EXTRA_LIBPCRECPP_LDFLAGS)
|
||||||
libpcrecpp_la_LIBADD = libpcre.la
|
libpcrecpp_la_LIBADD = libpcre.la
|
||||||
|
|
||||||
TESTS += pcrecpp_unittest
|
TESTS += pcrecpp_unittest
|
||||||
noinst_PROGRAMS += pcrecpp_unittest
|
noinst_PROGRAMS += pcrecpp_unittest
|
||||||
pcrecpp_unittest_SOURCES = pcrecpp_unittest.cc
|
pcrecpp_unittest_SOURCES = pcrecpp_unittest.cc
|
||||||
|
pcrecpp_unittest_CXXFLAGS = $(AM_CXXFLAGS)
|
||||||
pcrecpp_unittest_LDADD = libpcrecpp.la
|
pcrecpp_unittest_LDADD = libpcrecpp.la
|
||||||
|
|
||||||
TESTS += pcre_scanner_unittest
|
TESTS += pcre_scanner_unittest
|
||||||
noinst_PROGRAMS += pcre_scanner_unittest
|
noinst_PROGRAMS += pcre_scanner_unittest
|
||||||
pcre_scanner_unittest_SOURCES = pcre_scanner_unittest.cc
|
pcre_scanner_unittest_SOURCES = pcre_scanner_unittest.cc
|
||||||
|
pcre_scanner_unittest_CXXFLAGS = $(AM_CXXFLAGS)
|
||||||
pcre_scanner_unittest_LDADD = libpcrecpp.la
|
pcre_scanner_unittest_LDADD = libpcrecpp.la
|
||||||
|
|
||||||
TESTS += pcre_stringpiece_unittest
|
TESTS += pcre_stringpiece_unittest
|
||||||
noinst_PROGRAMS += pcre_stringpiece_unittest
|
noinst_PROGRAMS += pcre_stringpiece_unittest
|
||||||
pcre_stringpiece_unittest_SOURCES = pcre_stringpiece_unittest.cc
|
pcre_stringpiece_unittest_SOURCES = pcre_stringpiece_unittest.cc
|
||||||
|
pcre_stringpiece_unittest_CXXFLAGS = $(AM_CXXFLAGS)
|
||||||
pcre_stringpiece_unittest_LDADD = libpcrecpp.la
|
pcre_stringpiece_unittest_LDADD = libpcrecpp.la
|
||||||
|
|
||||||
|
if WITH_GCOV
|
||||||
|
libpcrecpp_la_CXXFLAGS += $(GCOV_CXXFLAGS)
|
||||||
|
pcrecpp_unittest_LDADD += $(GCOV_LIBS)
|
||||||
|
pcre_scanner_unittest_LDADD += $(GCOV_LIBS)
|
||||||
|
pcre_stringpiece_unittest_LDADD += $(GCOV_LIBS)
|
||||||
|
endif # WITH_GCOV
|
||||||
|
|
||||||
endif # WITH_PCRE_CPP
|
endif # WITH_PCRE_CPP
|
||||||
|
|
||||||
## The main unit tests
|
## The main unit tests
|
||||||
@ -243,16 +482,47 @@ dist_noinst_SCRIPTS += RunTest
|
|||||||
EXTRA_DIST += RunTest.bat
|
EXTRA_DIST += RunTest.bat
|
||||||
bin_PROGRAMS += pcretest
|
bin_PROGRAMS += pcretest
|
||||||
pcretest_SOURCES = pcretest.c
|
pcretest_SOURCES = pcretest.c
|
||||||
pcretest_LDADD = libpcreposix.la $(LIBREADLINE)
|
pcretest_CFLAGS = $(AM_CFLAGS)
|
||||||
|
pcretest_LDADD = $(LIBREADLINE)
|
||||||
|
if WITH_PCRE8
|
||||||
|
pcretest_SOURCES += pcre_printint.c
|
||||||
|
pcretest_LDADD += libpcre.la libpcreposix.la
|
||||||
|
endif # WITH_PCRE8
|
||||||
|
if WITH_PCRE16
|
||||||
|
pcretest_SOURCES += pcre16_printint.c
|
||||||
|
pcretest_LDADD += libpcre16.la
|
||||||
|
endif # WITH_PCRE16
|
||||||
|
if WITH_PCRE32
|
||||||
|
pcretest_SOURCES += pcre32_printint.c
|
||||||
|
pcretest_LDADD += libpcre32.la
|
||||||
|
endif # WITH_PCRE32
|
||||||
|
if WITH_VALGRIND
|
||||||
|
pcretest_CFLAGS += $(VALGRIND_CFLAGS)
|
||||||
|
endif # WITH_VALGRIND
|
||||||
|
if WITH_GCOV
|
||||||
|
pcretest_CFLAGS += $(GCOV_CFLAGS)
|
||||||
|
pcretest_LDADD += $(GCOV_LIBS)
|
||||||
|
endif # WITH_GCOV
|
||||||
|
|
||||||
|
if WITH_PCRE8
|
||||||
TESTS += RunGrepTest
|
TESTS += RunGrepTest
|
||||||
dist_noinst_SCRIPTS += RunGrepTest
|
dist_noinst_SCRIPTS += RunGrepTest
|
||||||
bin_PROGRAMS += pcregrep
|
bin_PROGRAMS += pcregrep
|
||||||
pcregrep_SOURCES = pcregrep.c
|
pcregrep_SOURCES = pcregrep.c
|
||||||
pcregrep_LDADD = libpcreposix.la $(LIBZ) $(LIBBZ2)
|
pcregrep_CFLAGS = $(AM_CFLAGS)
|
||||||
|
pcregrep_LDADD = $(LIBZ) $(LIBBZ2)
|
||||||
|
pcregrep_LDADD += libpcre.la libpcreposix.la
|
||||||
|
if WITH_GCOV
|
||||||
|
pcregrep_CFLAGS += $(GCOV_CFLAGS)
|
||||||
|
pcregrep_LDADD += $(GCOV_LIBS)
|
||||||
|
endif # WITH_GCOV
|
||||||
|
endif # WITH_PCRE8
|
||||||
|
|
||||||
EXTRA_DIST += \
|
EXTRA_DIST += \
|
||||||
|
testdata/grepbinary \
|
||||||
|
testdata/grepfilelist \
|
||||||
testdata/grepinput \
|
testdata/grepinput \
|
||||||
|
testdata/grepinput3 \
|
||||||
testdata/grepinput8 \
|
testdata/grepinput8 \
|
||||||
testdata/grepinputv \
|
testdata/grepinputv \
|
||||||
testdata/grepinputx \
|
testdata/grepinputx \
|
||||||
@ -260,6 +530,18 @@ EXTRA_DIST += \
|
|||||||
testdata/grepoutput \
|
testdata/grepoutput \
|
||||||
testdata/grepoutput8 \
|
testdata/grepoutput8 \
|
||||||
testdata/grepoutputN \
|
testdata/grepoutputN \
|
||||||
|
testdata/greppatN4 \
|
||||||
|
testdata/saved16 \
|
||||||
|
testdata/saved16BE-1 \
|
||||||
|
testdata/saved16BE-2 \
|
||||||
|
testdata/saved16LE-1 \
|
||||||
|
testdata/saved16LE-2 \
|
||||||
|
testdata/saved32 \
|
||||||
|
testdata/saved32BE-1 \
|
||||||
|
testdata/saved32BE-2 \
|
||||||
|
testdata/saved32LE-1 \
|
||||||
|
testdata/saved32LE-2 \
|
||||||
|
testdata/saved8 \
|
||||||
testdata/testinput1 \
|
testdata/testinput1 \
|
||||||
testdata/testinput2 \
|
testdata/testinput2 \
|
||||||
testdata/testinput3 \
|
testdata/testinput3 \
|
||||||
@ -270,6 +552,23 @@ EXTRA_DIST += \
|
|||||||
testdata/testinput8 \
|
testdata/testinput8 \
|
||||||
testdata/testinput9 \
|
testdata/testinput9 \
|
||||||
testdata/testinput10 \
|
testdata/testinput10 \
|
||||||
|
testdata/testinput11 \
|
||||||
|
testdata/testinput12 \
|
||||||
|
testdata/testinput13 \
|
||||||
|
testdata/testinput14 \
|
||||||
|
testdata/testinput15 \
|
||||||
|
testdata/testinput16 \
|
||||||
|
testdata/testinput17 \
|
||||||
|
testdata/testinput18 \
|
||||||
|
testdata/testinput19 \
|
||||||
|
testdata/testinput20 \
|
||||||
|
testdata/testinput21 \
|
||||||
|
testdata/testinput22 \
|
||||||
|
testdata/testinput23 \
|
||||||
|
testdata/testinput24 \
|
||||||
|
testdata/testinput25 \
|
||||||
|
testdata/testinput26 \
|
||||||
|
testdata/testinputEBC \
|
||||||
testdata/testoutput1 \
|
testdata/testoutput1 \
|
||||||
testdata/testoutput2 \
|
testdata/testoutput2 \
|
||||||
testdata/testoutput3 \
|
testdata/testoutput3 \
|
||||||
@ -280,6 +579,28 @@ EXTRA_DIST += \
|
|||||||
testdata/testoutput8 \
|
testdata/testoutput8 \
|
||||||
testdata/testoutput9 \
|
testdata/testoutput9 \
|
||||||
testdata/testoutput10 \
|
testdata/testoutput10 \
|
||||||
|
testdata/testoutput11-8 \
|
||||||
|
testdata/testoutput11-16 \
|
||||||
|
testdata/testoutput11-32 \
|
||||||
|
testdata/testoutput12 \
|
||||||
|
testdata/testoutput13 \
|
||||||
|
testdata/testoutput14 \
|
||||||
|
testdata/testoutput15 \
|
||||||
|
testdata/testoutput16 \
|
||||||
|
testdata/testoutput17 \
|
||||||
|
testdata/testoutput18-16 \
|
||||||
|
testdata/testoutput18-32 \
|
||||||
|
testdata/testoutput19 \
|
||||||
|
testdata/testoutput20 \
|
||||||
|
testdata/testoutput21-16 \
|
||||||
|
testdata/testoutput21-32 \
|
||||||
|
testdata/testoutput22-16 \
|
||||||
|
testdata/testoutput22-32 \
|
||||||
|
testdata/testoutput23 \
|
||||||
|
testdata/testoutput24 \
|
||||||
|
testdata/testoutput25 \
|
||||||
|
testdata/testoutput26 \
|
||||||
|
testdata/testoutputEBC \
|
||||||
testdata/wintestinput3 \
|
testdata/wintestinput3 \
|
||||||
testdata/wintestoutput3 \
|
testdata/wintestoutput3 \
|
||||||
perltest.pl
|
perltest.pl
|
||||||
@ -287,6 +608,7 @@ EXTRA_DIST += \
|
|||||||
CLEANFILES += \
|
CLEANFILES += \
|
||||||
testsavedregex \
|
testsavedregex \
|
||||||
teststderr \
|
teststderr \
|
||||||
|
testtemp* \
|
||||||
testtry \
|
testtry \
|
||||||
testNinput
|
testNinput
|
||||||
|
|
||||||
@ -309,13 +631,12 @@ test: check ;
|
|||||||
# A PCRE user submitted the following addition, saying that it "will allow
|
# A PCRE user submitted the following addition, saying that it "will allow
|
||||||
# anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a
|
# anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a
|
||||||
# nice DLL for Windows use". (It is used by the pcre.dll target.)
|
# nice DLL for Windows use". (It is used by the pcre.dll target.)
|
||||||
DLL_OBJS= pcre_compile.o pcre_config.o \
|
DLL_OBJS= pcre_byte_order.o pcre_compile.o pcre_config.o \
|
||||||
pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \
|
pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \
|
||||||
pcre_globals.o pcre_info.o pcre_maketables.o \
|
pcre_globals.o pcre_jit_compile.o pcre_maketables.o \
|
||||||
pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \
|
pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \
|
||||||
pcre_study.o pcre_tables.o pcre_try_flipped.o \
|
pcre_study.o pcre_tables.o pcre_ucd.o \
|
||||||
pcre_ucd.o pcre_valid_utf8.o pcre_version.o \
|
pcre_valid_utf8.o pcre_version.o pcre_chartables.o \
|
||||||
pcre_chartables.o \
|
|
||||||
pcre_xclass.o
|
pcre_xclass.o
|
||||||
|
|
||||||
# A PCRE user submitted the following addition, saying that it "will allow
|
# A PCRE user submitted the following addition, saying that it "will allow
|
||||||
@ -327,14 +648,23 @@ pcre.dll: $(DLL_OBJS)
|
|||||||
|
|
||||||
# We have .pc files for pkg-config users.
|
# We have .pc files for pkg-config users.
|
||||||
pkgconfigdir = $(libdir)/pkgconfig
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
pkgconfig_DATA = libpcre.pc
|
pkgconfig_DATA = libpcre.pc libpcreposix.pc
|
||||||
|
if WITH_PCRE16
|
||||||
|
pkgconfig_DATA += libpcre16.pc
|
||||||
|
endif
|
||||||
|
if WITH_PCRE32
|
||||||
|
pkgconfig_DATA += libpcre32.pc
|
||||||
|
endif
|
||||||
if WITH_PCRE_CPP
|
if WITH_PCRE_CPP
|
||||||
pkgconfig_DATA += libpcrecpp.pc
|
pkgconfig_DATA += libpcrecpp.pc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
dist_man_MANS = \
|
dist_man_MANS = \
|
||||||
doc/pcre.3 \
|
doc/pcre.3 \
|
||||||
|
doc/pcre16.3 \
|
||||||
|
doc/pcre32.3 \
|
||||||
doc/pcre-config.1 \
|
doc/pcre-config.1 \
|
||||||
|
doc/pcre_assign_jit_stack.3 \
|
||||||
doc/pcre_compile.3 \
|
doc/pcre_compile.3 \
|
||||||
doc/pcre_compile2.3 \
|
doc/pcre_compile2.3 \
|
||||||
doc/pcre_config.3 \
|
doc/pcre_config.3 \
|
||||||
@ -342,6 +672,7 @@ dist_man_MANS = \
|
|||||||
doc/pcre_copy_substring.3 \
|
doc/pcre_copy_substring.3 \
|
||||||
doc/pcre_dfa_exec.3 \
|
doc/pcre_dfa_exec.3 \
|
||||||
doc/pcre_exec.3 \
|
doc/pcre_exec.3 \
|
||||||
|
doc/pcre_free_study.3 \
|
||||||
doc/pcre_free_substring.3 \
|
doc/pcre_free_substring.3 \
|
||||||
doc/pcre_free_substring_list.3 \
|
doc/pcre_free_substring_list.3 \
|
||||||
doc/pcre_fullinfo.3 \
|
doc/pcre_fullinfo.3 \
|
||||||
@ -350,16 +681,23 @@ dist_man_MANS = \
|
|||||||
doc/pcre_get_stringtable_entries.3 \
|
doc/pcre_get_stringtable_entries.3 \
|
||||||
doc/pcre_get_substring.3 \
|
doc/pcre_get_substring.3 \
|
||||||
doc/pcre_get_substring_list.3 \
|
doc/pcre_get_substring_list.3 \
|
||||||
doc/pcre_info.3 \
|
doc/pcre_jit_exec.3 \
|
||||||
|
doc/pcre_jit_stack_alloc.3 \
|
||||||
|
doc/pcre_jit_stack_free.3 \
|
||||||
doc/pcre_maketables.3 \
|
doc/pcre_maketables.3 \
|
||||||
|
doc/pcre_pattern_to_host_byte_order.3 \
|
||||||
doc/pcre_refcount.3 \
|
doc/pcre_refcount.3 \
|
||||||
doc/pcre_study.3 \
|
doc/pcre_study.3 \
|
||||||
|
doc/pcre_utf16_to_host_byte_order.3 \
|
||||||
|
doc/pcre_utf32_to_host_byte_order.3 \
|
||||||
doc/pcre_version.3 \
|
doc/pcre_version.3 \
|
||||||
doc/pcreapi.3 \
|
doc/pcreapi.3 \
|
||||||
doc/pcrebuild.3 \
|
doc/pcrebuild.3 \
|
||||||
doc/pcrecallout.3 \
|
doc/pcrecallout.3 \
|
||||||
doc/pcrecompat.3 \
|
doc/pcrecompat.3 \
|
||||||
doc/pcregrep.1 \
|
doc/pcregrep.1 \
|
||||||
|
doc/pcrejit.3 \
|
||||||
|
doc/pcrelimits.3 \
|
||||||
doc/pcrematching.3 \
|
doc/pcrematching.3 \
|
||||||
doc/pcrepartial.3 \
|
doc/pcrepartial.3 \
|
||||||
doc/pcrepattern.3 \
|
doc/pcrepattern.3 \
|
||||||
@ -369,7 +707,63 @@ dist_man_MANS = \
|
|||||||
doc/pcresample.3 \
|
doc/pcresample.3 \
|
||||||
doc/pcrestack.3 \
|
doc/pcrestack.3 \
|
||||||
doc/pcresyntax.3 \
|
doc/pcresyntax.3 \
|
||||||
doc/pcretest.1
|
doc/pcretest.1 \
|
||||||
|
doc/pcreunicode.3
|
||||||
|
|
||||||
|
# Arrange for the per-function man pages to have 16- and 32-bit names as well.
|
||||||
|
install-data-hook:
|
||||||
|
ln -sf pcre_assign_jit_stack.3 $(DESTDIR)$(man3dir)/pcre16_assign_jit_stack.3
|
||||||
|
ln -sf pcre_compile.3 $(DESTDIR)$(man3dir)/pcre16_compile.3
|
||||||
|
ln -sf pcre_compile2.3 $(DESTDIR)$(man3dir)/pcre16_compile2.3
|
||||||
|
ln -sf pcre_config.3 $(DESTDIR)$(man3dir)/pcre16_config.3
|
||||||
|
ln -sf pcre_copy_named_substring.3 $(DESTDIR)$(man3dir)/pcre16_copy_named_substring.3
|
||||||
|
ln -sf pcre_copy_substring.3 $(DESTDIR)$(man3dir)/pcre16_copy_substring.3
|
||||||
|
ln -sf pcre_dfa_exec.3 $(DESTDIR)$(man3dir)/pcre16_dfa_exec.3
|
||||||
|
ln -sf pcre_exec.3 $(DESTDIR)$(man3dir)/pcre16_exec.3
|
||||||
|
ln -sf pcre_free_study.3 $(DESTDIR)$(man3dir)/pcre16_free_study.3
|
||||||
|
ln -sf pcre_free_substring.3 $(DESTDIR)$(man3dir)/pcre16_free_substring.3
|
||||||
|
ln -sf pcre_free_substring_list.3 $(DESTDIR)$(man3dir)/pcre16_free_substring_list.3
|
||||||
|
ln -sf pcre_fullinfo.3 $(DESTDIR)$(man3dir)/pcre16_fullinfo.3
|
||||||
|
ln -sf pcre_get_named_substring.3 $(DESTDIR)$(man3dir)/pcre16_get_named_substring.3
|
||||||
|
ln -sf pcre_get_stringnumber.3 $(DESTDIR)$(man3dir)/pcre16_get_stringnumber.3
|
||||||
|
ln -sf pcre_get_stringtable_entries.3 $(DESTDIR)$(man3dir)/pcre16_get_stringtable_entries.3
|
||||||
|
ln -sf pcre_get_substring.3 $(DESTDIR)$(man3dir)/pcre16_get_substring.3
|
||||||
|
ln -sf pcre_get_substring_list.3 $(DESTDIR)$(man3dir)/pcre16_get_substring_list.3
|
||||||
|
ln -sf pcre_jit_exec.3 $(DESTDIR)$(man3dir)/pcre16_jit_exec.3
|
||||||
|
ln -sf pcre_jit_stack_alloc.3 $(DESTDIR)$(man3dir)/pcre16_jit_stack_alloc.3
|
||||||
|
ln -sf pcre_jit_stack_free.3 $(DESTDIR)$(man3dir)/pcre16_jit_stack_free.3
|
||||||
|
ln -sf pcre_maketables.3 $(DESTDIR)$(man3dir)/pcre16_maketables.3
|
||||||
|
ln -sf pcre_pattern_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre16_pattern_to_host_byte_order.3
|
||||||
|
ln -sf pcre_refcount.3 $(DESTDIR)$(man3dir)/pcre16_refcount.3
|
||||||
|
ln -sf pcre_study.3 $(DESTDIR)$(man3dir)/pcre16_study.3
|
||||||
|
ln -sf pcre_utf16_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre16_utf16_to_host_byte_order.3
|
||||||
|
ln -sf pcre_version.3 $(DESTDIR)$(man3dir)/pcre16_version.3
|
||||||
|
ln -sf pcre_assign_jit_stack.3 $(DESTDIR)$(man3dir)/pcre32_assign_jit_stack.3
|
||||||
|
ln -sf pcre_compile.3 $(DESTDIR)$(man3dir)/pcre32_compile.3
|
||||||
|
ln -sf pcre_compile2.3 $(DESTDIR)$(man3dir)/pcre32_compile2.3
|
||||||
|
ln -sf pcre_config.3 $(DESTDIR)$(man3dir)/pcre32_config.3
|
||||||
|
ln -sf pcre_copy_named_substring.3 $(DESTDIR)$(man3dir)/pcre32_copy_named_substring.3
|
||||||
|
ln -sf pcre_copy_substring.3 $(DESTDIR)$(man3dir)/pcre32_copy_substring.3
|
||||||
|
ln -sf pcre_dfa_exec.3 $(DESTDIR)$(man3dir)/pcre32_dfa_exec.3
|
||||||
|
ln -sf pcre_exec.3 $(DESTDIR)$(man3dir)/pcre32_exec.3
|
||||||
|
ln -sf pcre_free_study.3 $(DESTDIR)$(man3dir)/pcre32_free_study.3
|
||||||
|
ln -sf pcre_free_substring.3 $(DESTDIR)$(man3dir)/pcre32_free_substring.3
|
||||||
|
ln -sf pcre_free_substring_list.3 $(DESTDIR)$(man3dir)/pcre32_free_substring_list.3
|
||||||
|
ln -sf pcre_fullinfo.3 $(DESTDIR)$(man3dir)/pcre32_fullinfo.3
|
||||||
|
ln -sf pcre_get_named_substring.3 $(DESTDIR)$(man3dir)/pcre32_get_named_substring.3
|
||||||
|
ln -sf pcre_get_stringnumber.3 $(DESTDIR)$(man3dir)/pcre32_get_stringnumber.3
|
||||||
|
ln -sf pcre_get_stringtable_entries.3 $(DESTDIR)$(man3dir)/pcre32_get_stringtable_entries.3
|
||||||
|
ln -sf pcre_get_substring.3 $(DESTDIR)$(man3dir)/pcre32_get_substring.3
|
||||||
|
ln -sf pcre_get_substring_list.3 $(DESTDIR)$(man3dir)/pcre32_get_substring_list.3
|
||||||
|
ln -sf pcre_jit_exec.3 $(DESTDIR)$(man3dir)/pcre32_jit_exec.3
|
||||||
|
ln -sf pcre_jit_stack_alloc.3 $(DESTDIR)$(man3dir)/pcre32_jit_stack_alloc.3
|
||||||
|
ln -sf pcre_jit_stack_free.3 $(DESTDIR)$(man3dir)/pcre32_jit_stack_free.3
|
||||||
|
ln -sf pcre_maketables.3 $(DESTDIR)$(man3dir)/pcre32_maketables.3
|
||||||
|
ln -sf pcre_pattern_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre32_pattern_to_host_byte_order.3
|
||||||
|
ln -sf pcre_refcount.3 $(DESTDIR)$(man3dir)/pcre32_refcount.3
|
||||||
|
ln -sf pcre_study.3 $(DESTDIR)$(man3dir)/pcre32_study.3
|
||||||
|
ln -sf pcre_utf32_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre32_utf32_to_host_byte_order.3
|
||||||
|
ln -sf pcre_version.3 $(DESTDIR)$(man3dir)/pcre32_version.3
|
||||||
|
|
||||||
pcrecpp_man = doc/pcrecpp.3
|
pcrecpp_man = doc/pcrecpp.3
|
||||||
EXTRA_DIST += $(pcrecpp_man)
|
EXTRA_DIST += $(pcrecpp_man)
|
||||||
@ -378,12 +772,105 @@ if WITH_PCRE_CPP
|
|||||||
man_MANS = $(pcrecpp_man)
|
man_MANS = $(pcrecpp_man)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# gcov/lcov code coverage reporting
|
||||||
|
|
||||||
|
if WITH_GCOV
|
||||||
|
|
||||||
|
# Coverage reporting targets:
|
||||||
|
#
|
||||||
|
# coverage: Create a coverage report from 'make check'
|
||||||
|
# coverage-baseline: Capture baseline coverage information
|
||||||
|
# coverage-reset: This zeros the coverage counters only
|
||||||
|
# coverage-report: This creates the coverage report only
|
||||||
|
# coverage-clean-report: This removes the generated coverage report
|
||||||
|
# without cleaning the coverage data itself
|
||||||
|
# coverage-clean-data: This removes the captured coverage data without
|
||||||
|
# removing the coverage files created at compile time (*.gcno)
|
||||||
|
# coverage-clean: This cleans all coverage data including the generated
|
||||||
|
# coverage report.
|
||||||
|
|
||||||
|
COVERAGE_TEST_NAME = $(PACKAGE)
|
||||||
|
COVERAGE_NAME = $(PACKAGE)-$(VERSION)
|
||||||
|
COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info
|
||||||
|
COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage
|
||||||
|
COVERAGE_LCOV_EXTRA_FLAGS =
|
||||||
|
COVERAGE_GENHTML_EXTRA_FLAGS =
|
||||||
|
|
||||||
|
coverage_quiet = $(coverage_quiet_$(V))
|
||||||
|
coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY))
|
||||||
|
coverage_quiet_0 = --quiet
|
||||||
|
|
||||||
|
coverage-check: all
|
||||||
|
-$(MAKE) $(AM_MAKEFLAGS) -k check
|
||||||
|
|
||||||
|
coverage-baseline:
|
||||||
|
$(LCOV) $(coverage_quiet) \
|
||||||
|
--directory $(top_builddir) \
|
||||||
|
--output-file "$(COVERAGE_OUTPUT_FILE)" \
|
||||||
|
--capture \
|
||||||
|
--initial
|
||||||
|
|
||||||
|
coverage-report:
|
||||||
|
$(LCOV) $(coverage_quiet) \
|
||||||
|
--directory $(top_builddir) \
|
||||||
|
--capture \
|
||||||
|
--output-file "$(COVERAGE_OUTPUT_FILE).tmp" \
|
||||||
|
--test-name "$(COVERAGE_TEST_NAME)" \
|
||||||
|
--no-checksum \
|
||||||
|
--compat-libtool \
|
||||||
|
$(COVERAGE_LCOV_EXTRA_FLAGS)
|
||||||
|
$(LCOV) $(coverage_quiet) \
|
||||||
|
--directory $(top_builddir) \
|
||||||
|
--output-file "$(COVERAGE_OUTPUT_FILE)" \
|
||||||
|
--remove "$(COVERAGE_OUTPUT_FILE).tmp" \
|
||||||
|
"/tmp/*" \
|
||||||
|
"/usr/include/*" \
|
||||||
|
"$(includedir)/*"
|
||||||
|
-@rm -f "$(COVERAGE_OUTPUT_FILE).tmp"
|
||||||
|
LANG=C $(GENHTML) $(coverage_quiet) \
|
||||||
|
--prefix $(top_builddir) \
|
||||||
|
--output-directory "$(COVERAGE_OUTPUT_DIR)" \
|
||||||
|
--title "$(PACKAGE) $(VERSION) Code Coverage Report" \
|
||||||
|
--show-details "$(COVERAGE_OUTPUT_FILE)" \
|
||||||
|
--legend \
|
||||||
|
$(COVERAGE_GENHTML_EXTRA_FLAGS)
|
||||||
|
@echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html"
|
||||||
|
|
||||||
|
coverage-reset:
|
||||||
|
-$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir)
|
||||||
|
|
||||||
|
coverage-clean-report:
|
||||||
|
-rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp"
|
||||||
|
-rm -rf "$(COVERAGE_OUTPUT_DIR)"
|
||||||
|
|
||||||
|
coverage-clean-data:
|
||||||
|
-find $(top_builddir) -name "*.gcda" -delete
|
||||||
|
|
||||||
|
coverage-clean: coverage-reset coverage-clean-report coverage-clean-data
|
||||||
|
-find $(top_builddir) -name "*.gcno" -delete
|
||||||
|
|
||||||
|
coverage-distclean: coverage-clean
|
||||||
|
|
||||||
|
coverage: coverage-reset coverage-baseline coverage-check coverage-report
|
||||||
|
clean-local: coverage-clean
|
||||||
|
distclean-local: coverage-distclean
|
||||||
|
|
||||||
|
.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
coverage:
|
||||||
|
@echo "Configuring with --enable-coverage required to generate code coverage report."
|
||||||
|
|
||||||
|
endif # WITH_GCOV
|
||||||
|
|
||||||
## CMake support
|
## CMake support
|
||||||
|
|
||||||
EXTRA_DIST += \
|
EXTRA_DIST += \
|
||||||
cmake/COPYING-CMAKE-SCRIPTS \
|
cmake/COPYING-CMAKE-SCRIPTS \
|
||||||
cmake/FindPackageHandleStandardArgs.cmake \
|
cmake/FindPackageHandleStandardArgs.cmake \
|
||||||
cmake/FindReadline.cmake \
|
cmake/FindReadline.cmake \
|
||||||
|
cmake/FindEditline.cmake \
|
||||||
CMakeLists.txt \
|
CMakeLists.txt \
|
||||||
config-cmake.h.in
|
config-cmake.h.in
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
183
tools/pcre/NEWS
183
tools/pcre/NEWS
@ -1,6 +1,189 @@
|
|||||||
News about PCRE releases
|
News about PCRE releases
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
Release 8.32 30-November-2012
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
This release fixes a number of bugs, but also has some new features. These are
|
||||||
|
the highlights:
|
||||||
|
|
||||||
|
. There is now support for 32-bit character strings and UTF-32. Like the
|
||||||
|
16-bit support, this is done by compiling a separate 32-bit library.
|
||||||
|
|
||||||
|
. \X now matches a Unicode extended grapheme cluster.
|
||||||
|
|
||||||
|
. Case-independent matching of Unicode characters that have more than one
|
||||||
|
"other case" now makes all three (or more) characters equivalent. This
|
||||||
|
applies, for example, to Greek Sigma, which has two lowercase versions.
|
||||||
|
|
||||||
|
. Unicode character properties are updated to Unicode 6.2.0.
|
||||||
|
|
||||||
|
. The EBCDIC support, which had decayed, has had a spring clean.
|
||||||
|
|
||||||
|
. A number of JIT optimizations have been added, which give faster JIT
|
||||||
|
execution speed. In addition, a new direct interface to JIT execution is
|
||||||
|
available. This bypasses some of the sanity checks of pcre_exec() to give a
|
||||||
|
noticeable speed-up.
|
||||||
|
|
||||||
|
. A number of issues in pcregrep have been fixed, making it more compatible
|
||||||
|
with GNU grep. In particular, --exclude and --include (and variants) apply
|
||||||
|
to all files now, not just those obtained from scanning a directory
|
||||||
|
recursively. In Windows environments, the default action for directories is
|
||||||
|
now "skip" instead of "read" (which provokes an error).
|
||||||
|
|
||||||
|
. If the --only-matching (-o) option in pcregrep is specified multiple
|
||||||
|
times, each one causes appropriate output. For example, -o1 -o2 outputs the
|
||||||
|
substrings matched by the 1st and 2nd capturing parentheses. A separating
|
||||||
|
string can be specified by --om-separator (default empty).
|
||||||
|
|
||||||
|
. When PCRE is built via Autotools using a version of gcc that has the
|
||||||
|
"visibility" feature, it is used to hide internal library functions that are
|
||||||
|
not part of the public API.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.31 06-July-2012
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
This is mainly a bug-fixing release, with a small number of developments:
|
||||||
|
|
||||||
|
. The JIT compiler now supports partial matching and the (*MARK) and
|
||||||
|
(*COMMIT) verbs.
|
||||||
|
|
||||||
|
. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehind in a
|
||||||
|
pattern.
|
||||||
|
|
||||||
|
. There should be a performance improvement when using the heap instead of the
|
||||||
|
stack for recursion.
|
||||||
|
|
||||||
|
. pcregrep can now be linked with libedit as an alternative to libreadline.
|
||||||
|
|
||||||
|
. pcregrep now has a --file-list option where the list of files to scan is
|
||||||
|
given as a file.
|
||||||
|
|
||||||
|
. pcregrep now recognizes binary files and there are related options.
|
||||||
|
|
||||||
|
. The Unicode tables have been updated to 6.1.0.
|
||||||
|
|
||||||
|
As always, the full list of changes is in the ChangeLog file.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.30 04-February-2012
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Release 8.30 introduces a major new feature: support for 16-bit character
|
||||||
|
strings, compiled as a separate library. There are a few changes to the
|
||||||
|
8-bit library, in addition to some bug fixes.
|
||||||
|
|
||||||
|
. The pcre_info() function, which has been obsolete for over 10 years, has
|
||||||
|
been removed.
|
||||||
|
|
||||||
|
. When a compiled pattern was saved to a file and later reloaded on a host
|
||||||
|
with different endianness, PCRE used automatically to swap the bytes in some
|
||||||
|
of the data fields. With the advent of the 16-bit library, where more of this
|
||||||
|
swapping is needed, it is no longer done automatically. Instead, the bad
|
||||||
|
endianness is detected and a specific error is given. The user can then call
|
||||||
|
a new function called pcre_pattern_to_host_byte_order() (or an equivalent
|
||||||
|
16-bit function) to do the swap.
|
||||||
|
|
||||||
|
. In UTF-8 mode, the values 0xd800 to 0xdfff are not legal Unicode
|
||||||
|
code points and are now faulted. (They are the so-called "surrogates"
|
||||||
|
that are reserved for coding high values in UTF-16.)
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.21 12-Dec-2011
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
This is almost entirely a bug-fix release. The only new feature is the ability
|
||||||
|
to obtain the size of the memory used by the JIT compiler.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.20 21-Oct-2011
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
The main change in this release is the inclusion of Zoltan Herczeg's
|
||||||
|
just-in-time compiler support, which can be accessed by building PCRE with
|
||||||
|
--enable-jit. Large performance benefits can be had in many situations. 8.20
|
||||||
|
also fixes an unfortunate bug that was introduced in 8.13 as well as tidying up
|
||||||
|
a number of infelicities and differences from Perl.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.13 16-Aug-2011
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
This is mainly a bug-fix release. There has been a lot of internal refactoring.
|
||||||
|
The Unicode tables have been updated. The only new feature in the library is
|
||||||
|
the passing of *MARK information to callouts. Some additions have been made to
|
||||||
|
pcretest to make testing easier and more comprehensive. There is a new option
|
||||||
|
for pcregrep to adjust its internal buffer size.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.12 15-Jan-2011
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
This release fixes some bugs in pcregrep, one of which caused the tests to fail
|
||||||
|
on 64-bit big-endian systems. There are no changes to the code of the library.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.11 10-Dec-2010
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
A number of bugs in the library and in pcregrep have been fixed. As always, see
|
||||||
|
ChangeLog for details. The following are the non-bug-fix changes:
|
||||||
|
|
||||||
|
. Added --match-limit and --recursion-limit to pcregrep.
|
||||||
|
|
||||||
|
. Added an optional parentheses number to the -o and --only-matching options
|
||||||
|
of pcregrep.
|
||||||
|
|
||||||
|
. Changed the way PCRE_PARTIAL_HARD affects the matching of $, \z, \Z, \b, and
|
||||||
|
\B.
|
||||||
|
|
||||||
|
. Added PCRE_ERROR_SHORTUTF8 to make it possible to distinguish between a
|
||||||
|
bad UTF-8 sequence and one that is incomplete when using PCRE_PARTIAL_HARD.
|
||||||
|
|
||||||
|
. Recognize (*NO_START_OPT) at the start of a pattern to set the PCRE_NO_
|
||||||
|
START_OPTIMIZE option, which is now allowed at compile time
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.10 25-Jun-2010
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
There are two major additions: support for (*MARK) and friends, and the option
|
||||||
|
PCRE_UCP, which changes the behaviour of \b, \d, \s, and \w (and their
|
||||||
|
opposites) so that they make use of Unicode properties. There are also a number
|
||||||
|
of lesser new features, and several bugs have been fixed. A new option,
|
||||||
|
--line-buffered, has been added to pcregrep, for use when it is connected to
|
||||||
|
pipes.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.02 19-Mar-2010
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
Another bug-fix release.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.01 19-Jan-2010
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
This is a bug-fix release. Several bugs in the code itself and some bugs and
|
||||||
|
infelicities in the build system have been fixed.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.00 19-Oct-09
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Bugs have been fixed in the library and in pcregrep. There are also some
|
||||||
|
enhancements. Restrictions on patterns used for partial matching have been
|
||||||
|
removed, extra information is given for partial matches, the partial matching
|
||||||
|
process has been improved, and an option to make a partial match override a
|
||||||
|
full match is available. The "study" process has been enhanced by finding a
|
||||||
|
lower bound matching length. Groups with duplicate numbers may now have
|
||||||
|
duplicated names without the use of PCRE_DUPNAMES. However, they may not have
|
||||||
|
different names. The documentation has been revised to reflect these changes.
|
||||||
|
The version number has been expanded to 3 digits as it is clear that the rate
|
||||||
|
of change is not slowing down.
|
||||||
|
|
||||||
|
|
||||||
Release 7.9 11-Apr-09
|
Release 7.9 11-Apr-09
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
639
tools/pcre/NON-AUTOTOOLS-BUILD
Normal file
639
tools/pcre/NON-AUTOTOOLS-BUILD
Normal file
@ -0,0 +1,639 @@
|
|||||||
|
Building PCRE without using autotools
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
This document contains the following sections:
|
||||||
|
|
||||||
|
General
|
||||||
|
Generic instructions for the PCRE C library
|
||||||
|
The C++ wrapper functions
|
||||||
|
Building for virtual Pascal
|
||||||
|
Stack size in Windows environments
|
||||||
|
Linking programs in Windows environments
|
||||||
|
Comments about Win32 builds
|
||||||
|
Building PCRE on Windows with CMake
|
||||||
|
Use of relative paths with CMake on Windows
|
||||||
|
Testing with RunTest.bat
|
||||||
|
Building under Windows with BCC5.5
|
||||||
|
Building PCRE on OpenVMS
|
||||||
|
Building PCRE on Stratus OpenVOS
|
||||||
|
Building PCRE on native z/OS and z/VM
|
||||||
|
|
||||||
|
|
||||||
|
GENERAL
|
||||||
|
|
||||||
|
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
|
||||||
|
libraries work. The items in the PCRE distribution and Makefile that relate to
|
||||||
|
anything other than Linux systems are untested by me.
|
||||||
|
|
||||||
|
There are some other comments and files (including some documentation in CHM
|
||||||
|
format) in the Contrib directory on the FTP site:
|
||||||
|
|
||||||
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||||
|
|
||||||
|
The basic PCRE library consists entirely of code written in Standard C, and so
|
||||||
|
should compile successfully on any system that has a Standard C compiler and
|
||||||
|
library. The C++ wrapper functions are a separate issue (see below).
|
||||||
|
|
||||||
|
The PCRE distribution includes a "configure" file for use by the configure/make
|
||||||
|
(autotools) build system, as found in many Unix-like environments. The README
|
||||||
|
file contains information about the options for "configure".
|
||||||
|
|
||||||
|
There is also support for CMake, which some users prefer, especially in Windows
|
||||||
|
environments, though it can also be run in Unix-like environments. See the
|
||||||
|
section entitled "Building PCRE on Windows with CMake" below.
|
||||||
|
|
||||||
|
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
|
||||||
|
names config.h.generic and pcre.h.generic. These are provided for those who
|
||||||
|
build PCRE without using "configure" or CMake. If you use "configure" or CMake,
|
||||||
|
the .generic versions are not used.
|
||||||
|
|
||||||
|
|
||||||
|
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
|
||||||
|
|
||||||
|
The following are generic instructions for building the PCRE C library "by
|
||||||
|
hand". If you are going to use CMake, this section does not apply to you; you
|
||||||
|
can skip ahead to the CMake section.
|
||||||
|
|
||||||
|
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
||||||
|
settings that it contains to whatever is appropriate for your environment.
|
||||||
|
|
||||||
|
In particular, you can alter the definition of the NEWLINE macro to
|
||||||
|
specify what character(s) you want to be interpreted as line terminators.
|
||||||
|
In an EBCDIC environment, you MUST change NEWLINE, because its default
|
||||||
|
value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15,
|
||||||
|
NL), though in some cases it may be 37 (0x25).
|
||||||
|
|
||||||
|
When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H
|
||||||
|
to your compiler so that config.h is included in the sources.
|
||||||
|
|
||||||
|
An alternative approach is not to edit config.h, but to use -D on the
|
||||||
|
compiler command line to make any changes that you need to the
|
||||||
|
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||||
|
|
||||||
|
NOTE: There have been occasions when the way in which certain parameters
|
||||||
|
in config.h are used has changed between releases. (In the configure/make
|
||||||
|
world, this is handled automatically.) When upgrading to a new release,
|
||||||
|
you are strongly advised to review config.h.generic before re-using what
|
||||||
|
you had previously.
|
||||||
|
|
||||||
|
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
||||||
|
|
||||||
|
(3) EITHER:
|
||||||
|
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
||||||
|
|
||||||
|
OR:
|
||||||
|
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
|
||||||
|
you have set up config.h), and then run it with the single argument
|
||||||
|
"pcre_chartables.c". This generates a set of standard character tables
|
||||||
|
and writes them to that file. The tables are generated using the default
|
||||||
|
C locale for your system. If you want to use a locale that is specified
|
||||||
|
by LC_xxx environment variables, add the -L option to the dftables
|
||||||
|
command. You must use this method if you are building on a system that
|
||||||
|
uses EBCDIC code.
|
||||||
|
|
||||||
|
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
||||||
|
specify alternative tables at run time.
|
||||||
|
|
||||||
|
(4) Ensure that you have the following header files:
|
||||||
|
|
||||||
|
pcre_internal.h
|
||||||
|
ucp.h
|
||||||
|
|
||||||
|
(5) For an 8-bit library, compile the following source files, setting
|
||||||
|
-DHAVE_CONFIG_H as a compiler option if you have set up config.h with your
|
||||||
|
configuration, or else use other -D settings to change the configuration
|
||||||
|
as required.
|
||||||
|
|
||||||
|
pcre_byte_order.c
|
||||||
|
pcre_chartables.c
|
||||||
|
pcre_compile.c
|
||||||
|
pcre_config.c
|
||||||
|
pcre_dfa_exec.c
|
||||||
|
pcre_exec.c
|
||||||
|
pcre_fullinfo.c
|
||||||
|
pcre_get.c
|
||||||
|
pcre_globals.c
|
||||||
|
pcre_jit_compile.c
|
||||||
|
pcre_maketables.c
|
||||||
|
pcre_newline.c
|
||||||
|
pcre_ord2utf8.c
|
||||||
|
pcre_refcount.c
|
||||||
|
pcre_string_utils.c
|
||||||
|
pcre_study.c
|
||||||
|
pcre_tables.c
|
||||||
|
pcre_ucd.c
|
||||||
|
pcre_valid_utf8.c
|
||||||
|
pcre_version.c
|
||||||
|
pcre_xclass.c
|
||||||
|
|
||||||
|
Make sure that you include -I. in the compiler command (or equivalent for
|
||||||
|
an unusual compiler) so that all included PCRE header files are first
|
||||||
|
sought in the current directory. Otherwise you run the risk of picking up
|
||||||
|
a previously-installed file from somewhere else.
|
||||||
|
|
||||||
|
Note that you must still compile pcre_jit_compile.c, even if you have not
|
||||||
|
defined SUPPORT_JIT in config.h, because when JIT support is not
|
||||||
|
configured, dummy functions are compiled. When JIT support IS configured,
|
||||||
|
pcre_jit_compile.c #includes sources from the sljit subdirectory, where
|
||||||
|
there should be 16 files, all of whose names begin with "sljit".
|
||||||
|
|
||||||
|
(6) Now link all the compiled code into an object library in whichever form
|
||||||
|
your system keeps such libraries. This is the basic PCRE C 8-bit library.
|
||||||
|
If your system has static and shared libraries, you may have to do this
|
||||||
|
once for each type.
|
||||||
|
|
||||||
|
(7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
||||||
|
or 32-bit libraries) repeat steps 5-6 with the following files:
|
||||||
|
|
||||||
|
pcre16_byte_order.c
|
||||||
|
pcre16_chartables.c
|
||||||
|
pcre16_compile.c
|
||||||
|
pcre16_config.c
|
||||||
|
pcre16_dfa_exec.c
|
||||||
|
pcre16_exec.c
|
||||||
|
pcre16_fullinfo.c
|
||||||
|
pcre16_get.c
|
||||||
|
pcre16_globals.c
|
||||||
|
pcre16_jit_compile.c
|
||||||
|
pcre16_maketables.c
|
||||||
|
pcre16_newline.c
|
||||||
|
pcre16_ord2utf16.c
|
||||||
|
pcre16_refcount.c
|
||||||
|
pcre16_string_utils.c
|
||||||
|
pcre16_study.c
|
||||||
|
pcre16_tables.c
|
||||||
|
pcre16_ucd.c
|
||||||
|
pcre16_utf16_utils.c
|
||||||
|
pcre16_valid_utf16.c
|
||||||
|
pcre16_version.c
|
||||||
|
pcre16_xclass.c
|
||||||
|
|
||||||
|
(7') If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
||||||
|
or 32-bit libraries) repeat steps 5-6 with the following files:
|
||||||
|
|
||||||
|
pcre32_byte_order.c
|
||||||
|
pcre32_chartables.c
|
||||||
|
pcre32_compile.c
|
||||||
|
pcre32_config.c
|
||||||
|
pcre32_dfa_exec.c
|
||||||
|
pcre32_exec.c
|
||||||
|
pcre32_fullinfo.c
|
||||||
|
pcre32_get.c
|
||||||
|
pcre32_globals.c
|
||||||
|
pcre32_jit_compile.c
|
||||||
|
pcre32_maketables.c
|
||||||
|
pcre32_newline.c
|
||||||
|
pcre32_ord2utf32.c
|
||||||
|
pcre32_refcount.c
|
||||||
|
pcre32_string_utils.c
|
||||||
|
pcre32_study.c
|
||||||
|
pcre32_tables.c
|
||||||
|
pcre32_ucd.c
|
||||||
|
pcre32_utf32_utils.c
|
||||||
|
pcre32_valid_utf32.c
|
||||||
|
pcre32_version.c
|
||||||
|
pcre32_xclass.c
|
||||||
|
|
||||||
|
(8) If you want to build the POSIX wrapper functions (which apply only to the
|
||||||
|
8-bit library), ensure that you have the pcreposix.h file and then compile
|
||||||
|
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
|
||||||
|
(on its own) as the pcreposix library.
|
||||||
|
|
||||||
|
(9) The pcretest program can be linked with any combination of the 8-bit, 16-bit
|
||||||
|
and 32-bit libraries (depending on what you selected in config.h). Compile
|
||||||
|
pcretest.c and pcre_printint.c (again, don't forget -DHAVE_CONFIG_H) and
|
||||||
|
link them together with the appropriate library/ies. If you compiled an
|
||||||
|
8-bit library, pcretest also needs the pcreposix wrapper library unless
|
||||||
|
you compiled it with -DNOPOSIX.
|
||||||
|
|
||||||
|
(10) Run pcretest on the testinput files in the testdata directory, and check
|
||||||
|
that the output matches the corresponding testoutput files. There are
|
||||||
|
comments about what each test does in the section entitled "Testing PCRE"
|
||||||
|
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||||
|
32-bit libraries, you need to run pcretest with the -16 option to do 16-bit
|
||||||
|
tests and with the -32 option to do 32-bit tests.
|
||||||
|
|
||||||
|
Some tests are relevant only when certain build-time options are selected.
|
||||||
|
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run if
|
||||||
|
you have built PCRE without it. See the comments at the start of each
|
||||||
|
testinput file. If you have a suitable Unix-like shell, the RunTest script
|
||||||
|
will run the appropriate tests for you.
|
||||||
|
|
||||||
|
Note that the supplied files are in Unix format, with just LF characters
|
||||||
|
as line terminators. You may need to edit them to change this if your
|
||||||
|
system uses a different convention. If you are using Windows, you probably
|
||||||
|
should use the wintestinput3 file instead of testinput3 (and the
|
||||||
|
corresponding output file). This is a locale test; wintestinput3 sets the
|
||||||
|
locale to "french" rather than "fr_FR", and there some minor output
|
||||||
|
differences.
|
||||||
|
|
||||||
|
(11) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
|
||||||
|
by the testdata files. However, you might also like to build and run
|
||||||
|
the JIT test program, pcre_jit_test.c.
|
||||||
|
|
||||||
|
(12) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||||
|
uses only the basic 8-bit PCRE library (it does not need the pcreposix
|
||||||
|
library).
|
||||||
|
|
||||||
|
|
||||||
|
THE C++ WRAPPER FUNCTIONS
|
||||||
|
|
||||||
|
The PCRE distribution also contains some C++ wrapper functions and tests,
|
||||||
|
applicable to the 8-bit library, which were contributed by Google Inc. On a
|
||||||
|
system that can use "configure" and "make", the functions are automatically
|
||||||
|
built into a library called pcrecpp. It should be straightforward to compile
|
||||||
|
the .cc files manually on other systems. The files called xxx_unittest.cc are
|
||||||
|
test programs for each of the corresponding xxx.cc files.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING FOR VIRTUAL PASCAL
|
||||||
|
|
||||||
|
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
||||||
|
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
|
||||||
|
additional files. The following files in the distribution are for building PCRE
|
||||||
|
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
|
||||||
|
|
||||||
|
|
||||||
|
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||||
|
|
||||||
|
The default processor stack size of 1Mb in some Windows environments is too
|
||||||
|
small for matching patterns that need much recursion. In particular, test 2 may
|
||||||
|
fail because of this. Normally, running out of stack causes a crash, but there
|
||||||
|
have been cases where the test program has just died silently. See your linker
|
||||||
|
documentation for how to increase stack size if you experience problems. The
|
||||||
|
Linux default of 8Mb is a reasonable choice for the stack, though even that can
|
||||||
|
be too small for some pattern/subject combinations.
|
||||||
|
|
||||||
|
PCRE has a compile configuration option to disable the use of stack for
|
||||||
|
recursion so that heap is used instead. However, pattern matching is
|
||||||
|
significantly slower when this is done. There is more about stack usage in the
|
||||||
|
"pcrestack" documentation.
|
||||||
|
|
||||||
|
|
||||||
|
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||||
|
|
||||||
|
If you want to statically link a program against a PCRE library in the form of
|
||||||
|
a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
|
||||||
|
pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
|
||||||
|
be declared __declspec(dllimport), with unwanted results.
|
||||||
|
|
||||||
|
|
||||||
|
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||||
|
|
||||||
|
It is possible to compile programs to use different calling conventions using
|
||||||
|
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||||
|
easier to change the calling convention for the exported functions in the
|
||||||
|
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
|
||||||
|
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||||
|
not set, it defaults to empty; the default calling convention is then used
|
||||||
|
(which is what is wanted most of the time).
|
||||||
|
|
||||||
|
|
||||||
|
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE")
|
||||||
|
|
||||||
|
There are two ways of building PCRE using the "configure, make, make install"
|
||||||
|
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||||
|
the same thing; they are completely different from each other. There is also
|
||||||
|
support for building using CMake, which some users find a more straightforward
|
||||||
|
way of building PCRE under Windows.
|
||||||
|
|
||||||
|
The MinGW home page (http://www.mingw.org/) says this:
|
||||||
|
|
||||||
|
MinGW: A collection of freely available and freely distributable Windows
|
||||||
|
specific header files and import libraries combined with GNU toolsets that
|
||||||
|
allow one to produce native Windows programs that do not rely on any
|
||||||
|
3rd-party C runtime DLLs.
|
||||||
|
|
||||||
|
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||||
|
|
||||||
|
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||||
|
|
||||||
|
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||||
|
substantial Linux API functionality
|
||||||
|
|
||||||
|
. A collection of tools which provide Linux look and feel.
|
||||||
|
|
||||||
|
The Cygwin DLL currently works with all recent, commercially released x86 32
|
||||||
|
bit and 64 bit versions of Windows, with the exception of Windows CE.
|
||||||
|
|
||||||
|
On both MinGW and Cygwin, PCRE should build correctly using:
|
||||||
|
|
||||||
|
./configure && make && make install
|
||||||
|
|
||||||
|
This should create two libraries called libpcre and libpcreposix, and, if you
|
||||||
|
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
|
||||||
|
independent libraries: when you link with libpcreposix or libpcrecpp you must
|
||||||
|
also link with libpcre, which contains the basic functions. (Some earlier
|
||||||
|
releases of PCRE included the basic libpcre functions in libpcreposix. This no
|
||||||
|
longer happens.)
|
||||||
|
|
||||||
|
A user submitted a special-purpose patch that makes it easy to create
|
||||||
|
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
|
||||||
|
as a special target. If you use this target, no other files are built, and in
|
||||||
|
particular, the pcretest and pcregrep programs are not built. An example of how
|
||||||
|
this might be used is:
|
||||||
|
|
||||||
|
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
|
||||||
|
|
||||||
|
Using Cygwin's compiler generates libraries and executables that depend on
|
||||||
|
cygwin1.dll. If a library that is generated this way is distributed,
|
||||||
|
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||||
|
licence, this forces not only PCRE to be under the GPL, but also the entire
|
||||||
|
application. A distributor who wants to keep their own code proprietary must
|
||||||
|
purchase an appropriate Cygwin licence.
|
||||||
|
|
||||||
|
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||||
|
executable that can run standalone on Windows without any third party dll or
|
||||||
|
licensing issues.
|
||||||
|
|
||||||
|
But there is more complication:
|
||||||
|
|
||||||
|
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||||
|
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||||
|
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||||
|
gcc and MinGW's gcc). So, a user can:
|
||||||
|
|
||||||
|
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||||
|
-mno-cygwin.
|
||||||
|
|
||||||
|
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||||
|
compiler flags.
|
||||||
|
|
||||||
|
The test files that are supplied with PCRE are in UNIX format, with LF
|
||||||
|
characters as line terminators. Unless your PCRE library uses a default newline
|
||||||
|
option that includes LF as a valid newline, it may be necessary to change the
|
||||||
|
line terminators in the test files to get some of the tests to work.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON WINDOWS WITH CMAKE
|
||||||
|
|
||||||
|
CMake is an alternative configuration facility that can be used instead of
|
||||||
|
"configure". CMake creates project files (make files, solution files, etc.)
|
||||||
|
tailored to numerous development environments, including Visual Studio,
|
||||||
|
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||||
|
spaces in the names for your CMake installation and your PCRE source and build
|
||||||
|
directories.
|
||||||
|
|
||||||
|
The following instructions were contributed by a PCRE user. If they are not
|
||||||
|
followed exactly, errors may occur. In the event that errors do occur, it is
|
||||||
|
recommended that you delete the CMake cache before attempting to repeat the
|
||||||
|
CMake build process. In the CMake GUI, the cache can be deleted by selecting
|
||||||
|
"File > Delete Cache".
|
||||||
|
|
||||||
|
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||||
|
ensure that cmake\bin is on your path.
|
||||||
|
|
||||||
|
2. Unzip (retaining folder structure) the PCRE source tree into a source
|
||||||
|
directory such as C:\pcre. You should ensure your local date and time
|
||||||
|
is not earlier than the file dates in your source dir if the release is
|
||||||
|
very new.
|
||||||
|
|
||||||
|
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||||
|
source dir. For example, C:\pcre\pcre-xx\build.
|
||||||
|
|
||||||
|
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
|
||||||
|
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||||
|
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||||
|
|
||||||
|
5. Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build
|
||||||
|
directories, respectively.
|
||||||
|
|
||||||
|
6. Hit the "Configure" button.
|
||||||
|
|
||||||
|
7. Select the particular IDE / build tool that you are using (Visual
|
||||||
|
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||||
|
|
||||||
|
8. The GUI will then list several configuration options. This is where
|
||||||
|
you can enable UTF-8 support or other PCRE optional features.
|
||||||
|
|
||||||
|
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||||
|
active.
|
||||||
|
|
||||||
|
10. Hit "Generate".
|
||||||
|
|
||||||
|
11. The build directory should now contain a usable build system, be it a
|
||||||
|
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||||
|
cmake-gui and use the generated build system with your compiler or IDE.
|
||||||
|
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE
|
||||||
|
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||||
|
build the ALL_BUILD project.
|
||||||
|
|
||||||
|
12. If during configuration with cmake-gui you've elected to build the test
|
||||||
|
programs, you can execute them by building the test project. E.g., for
|
||||||
|
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||||
|
most recent build configuration is targeted by the tests. A summary of
|
||||||
|
test results is presented. Complete test output is subsequently
|
||||||
|
available for review in Testing\Temporary under your build dir.
|
||||||
|
|
||||||
|
|
||||||
|
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
||||||
|
|
||||||
|
A PCRE user comments as follows:
|
||||||
|
|
||||||
|
I thought that others may want to know the current state of
|
||||||
|
CMAKE_USE_RELATIVE_PATHS support on Windows.
|
||||||
|
|
||||||
|
Here it is:
|
||||||
|
-- AdditionalIncludeDirectories is only partially modified (only the
|
||||||
|
first path - see below)
|
||||||
|
-- Only some of the contained file paths are modified - shown below for
|
||||||
|
pcre.vcproj
|
||||||
|
-- It properly modifies
|
||||||
|
|
||||||
|
I am sure CMake people can fix that if they want to. Until then one will
|
||||||
|
need to replace existing absolute paths in project files with relative
|
||||||
|
paths manually (e.g. from VS) - relative to project file location. I did
|
||||||
|
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
|
||||||
|
deal.
|
||||||
|
|
||||||
|
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
||||||
|
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
||||||
|
|
||||||
|
RelativePath="pcre.h">
|
||||||
|
RelativePath="pcre_chartables.c">
|
||||||
|
RelativePath="pcre_chartables.c.rule">
|
||||||
|
|
||||||
|
|
||||||
|
TESTING WITH RUNTEST.BAT
|
||||||
|
|
||||||
|
If configured with CMake, building the test project ("make test" or building
|
||||||
|
ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending
|
||||||
|
on your configuration options, possibly other test programs) in the build
|
||||||
|
directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
|
||||||
|
|
||||||
|
For manual testing with RunTest.bat, provided the build dir is a subdirectory
|
||||||
|
of the source directory: Open command shell window. Chdir to the location
|
||||||
|
of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with
|
||||||
|
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
|
||||||
|
|
||||||
|
To run only a particular test with RunTest.Bat provide a test number argument.
|
||||||
|
|
||||||
|
Otherwise:
|
||||||
|
|
||||||
|
1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe
|
||||||
|
have been created.
|
||||||
|
|
||||||
|
2. Edit RunTest.bat to indentify the full or relative location of
|
||||||
|
the pcre source (wherein which the testdata folder resides), e.g.:
|
||||||
|
|
||||||
|
set srcdir=C:\pcre\pcre-8.20
|
||||||
|
|
||||||
|
3. In a Windows command environment, chdir to the location of your bat and
|
||||||
|
exe programs.
|
||||||
|
|
||||||
|
4. Run RunTest.bat. Test outputs will automatically be compared to expected
|
||||||
|
results, and discrepancies will be identified in the console output.
|
||||||
|
|
||||||
|
To independently test the just-in-time compiler, run pcre_jit_test.exe.
|
||||||
|
To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
|
||||||
|
pcre_scanner_unittest.exe.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||||
|
|
||||||
|
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||||
|
|
||||||
|
Some of the core BCC libraries have a version of PCRE from 1998 built in,
|
||||||
|
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
|
||||||
|
version mismatch. I'm including an easy workaround below, if you'd like to
|
||||||
|
include it in the non-unix instructions:
|
||||||
|
|
||||||
|
When linking a project with BCC5.5, pcre.lib must be included before any of
|
||||||
|
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
|
||||||
|
line.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
||||||
|
|
||||||
|
Vincent Richomme sent a zip archive of files to help with this process. They
|
||||||
|
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
|
||||||
|
site.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON OPENVMS
|
||||||
|
|
||||||
|
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
|
||||||
|
relate to an older version of PCRE that used fewer source files, so the exact
|
||||||
|
commands will need changing. See the current list of source files above.
|
||||||
|
|
||||||
|
"It was quite easy to compile and link the library. I don't have a formal
|
||||||
|
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
||||||
|
commands I used to build the library. I had to add #define
|
||||||
|
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
|
||||||
|
|
||||||
|
The library was built on:
|
||||||
|
O/S: HP OpenVMS v7.3-1
|
||||||
|
Compiler: Compaq C v6.5-001-48BCD
|
||||||
|
Linker: vA13-01
|
||||||
|
|
||||||
|
The test results did not match 100% due to the issues you mention in your
|
||||||
|
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
|
||||||
|
modified some of the character tables temporarily and was able to get the
|
||||||
|
results to match. Tests using the fr locale did not match since I don't have
|
||||||
|
that locale loaded. The study size was always reported to be 3 less than the
|
||||||
|
value in the standard test output files."
|
||||||
|
|
||||||
|
=========================
|
||||||
|
$! This DCL procedure builds PCRE on OpenVMS
|
||||||
|
$!
|
||||||
|
$! I followed the instructions in the non-unix-use file in the distribution.
|
||||||
|
$!
|
||||||
|
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
|
||||||
|
$ COMPILE DFTABLES.C
|
||||||
|
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
|
||||||
|
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
|
||||||
|
$ COMPILE MAKETABLES.C
|
||||||
|
$ COMPILE GET.C
|
||||||
|
$ COMPILE STUDY.C
|
||||||
|
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||||
|
$! did not seem to be defined anywhere.
|
||||||
|
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
|
||||||
|
$ COMPILE PCRE.C
|
||||||
|
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
|
||||||
|
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||||
|
$! did not seem to be defined anywhere.
|
||||||
|
$ COMPILE PCREPOSIX.C
|
||||||
|
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
|
||||||
|
$ COMPILE PCRETEST.C
|
||||||
|
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
|
||||||
|
$! C programs that want access to command line arguments must be
|
||||||
|
$! defined as a symbol
|
||||||
|
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
|
||||||
|
$! Arguments must be enclosed in quotes.
|
||||||
|
$ PCRETEST "-C"
|
||||||
|
$! Test results:
|
||||||
|
$!
|
||||||
|
$! The test results did not match 100%. The functions isprint(), iscntrl(),
|
||||||
|
$! isgraph() and ispunct() on OpenVMS must not produce the same results
|
||||||
|
$! as the system that built the test output files provided with the
|
||||||
|
$! distribution.
|
||||||
|
$!
|
||||||
|
$! The study size did not match and was always 3 less on OpenVMS.
|
||||||
|
$!
|
||||||
|
$! Locale could not be set to fr
|
||||||
|
$!
|
||||||
|
=========================
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON STRATUS OPENVOS
|
||||||
|
|
||||||
|
These notes on the port of PCRE to VOS (lightly edited) were supplied by
|
||||||
|
Ashutosh Warikoo, whose email address has the local part awarikoo and the
|
||||||
|
domain nse.co.in. The port was for version 7.9 in August 2009.
|
||||||
|
|
||||||
|
1. Building PCRE
|
||||||
|
|
||||||
|
I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
|
||||||
|
problems. I used the following packages to build PCRE:
|
||||||
|
|
||||||
|
ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
|
||||||
|
|
||||||
|
Please read and follow the instructions that come with these packages. To start
|
||||||
|
the build of pcre, from the root of the package type:
|
||||||
|
|
||||||
|
./build.sh
|
||||||
|
|
||||||
|
2. Installing PCRE
|
||||||
|
|
||||||
|
Once you have successfully built PCRE, login to the SysAdmin group, switch to
|
||||||
|
the root user, and type
|
||||||
|
|
||||||
|
[ !create_dir (master_disk)>usr --if needed ]
|
||||||
|
[ !create_dir (master_disk)>usr>local --if needed ]
|
||||||
|
!gmake install
|
||||||
|
|
||||||
|
This installs PCRE and its man pages into /usr/local. You can add
|
||||||
|
(master_disk)>usr>local>bin to your command search paths, or if you are in
|
||||||
|
BASH, add /usr/local/bin to the PATH environment variable.
|
||||||
|
|
||||||
|
4. Restrictions
|
||||||
|
|
||||||
|
This port requires readline library optionally. However during the build I
|
||||||
|
faced some yet unexplored errors while linking with readline. As it was an
|
||||||
|
optional component I chose to disable it.
|
||||||
|
|
||||||
|
5. Known Problems
|
||||||
|
|
||||||
|
I ran the test suite, but you will have to be your own judge of whether this
|
||||||
|
command, and this port, suits your purposes. If you find any problems that
|
||||||
|
appear to be related to the port itself, please let me know. Please see the
|
||||||
|
build.log file in the root of the package also.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON NATIVE Z/OS AND Z/VM
|
||||||
|
|
||||||
|
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||||
|
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||||
|
applications can be supported through UNIX System Services, and in such an
|
||||||
|
environment PCRE can be built in the same way as in other systems. However, in
|
||||||
|
native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||||
|
required. For details, please see this web site:
|
||||||
|
|
||||||
|
http://www.zaconsultants.net
|
||||||
|
|
||||||
|
There is also a mirror here:
|
||||||
|
|
||||||
|
http://www.vsoft-software.com/downloads.html
|
||||||
|
|
||||||
|
==========================
|
||||||
|
Last Updated: 21 November 2012
|
@ -1,448 +1,7 @@
|
|||||||
Compiling PCRE on non-Unix systems
|
Compiling PCRE on non-Unix systems
|
||||||
----------------------------------
|
----------------------------------
|
||||||
|
|
||||||
This document contains the following sections:
|
This has been renamed to better reflect its contents. Please see the file
|
||||||
|
NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools.
|
||||||
|
|
||||||
General
|
####
|
||||||
Generic instructions for the PCRE C library
|
|
||||||
The C++ wrapper functions
|
|
||||||
Building for virtual Pascal
|
|
||||||
Stack size in Windows environments
|
|
||||||
Linking programs in Windows environments
|
|
||||||
Comments about Win32 builds
|
|
||||||
Building PCRE on Windows with CMake
|
|
||||||
Use of relative paths with CMake on Windows
|
|
||||||
Testing with runtest.bat
|
|
||||||
Building under Windows with BCC5.5
|
|
||||||
Building PCRE on OpenVMS
|
|
||||||
|
|
||||||
|
|
||||||
GENERAL
|
|
||||||
|
|
||||||
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
|
|
||||||
libraries work. The items in the PCRE distribution and Makefile that relate to
|
|
||||||
anything other than Unix-like systems are untested by me.
|
|
||||||
|
|
||||||
There are some other comments and files (including some documentation in CHM
|
|
||||||
format) in the Contrib directory on the FTP site:
|
|
||||||
|
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
|
||||||
|
|
||||||
If you want to compile PCRE for a non-Unix system (especially for a system that
|
|
||||||
does not support "configure" and "make" files), note that the basic PCRE
|
|
||||||
library consists entirely of code written in Standard C, and so should compile
|
|
||||||
successfully on any system that has a Standard C compiler and library. The C++
|
|
||||||
wrapper functions are a separate issue (see below).
|
|
||||||
|
|
||||||
The PCRE distribution includes a "configure" file for use by the Configure/Make
|
|
||||||
build system, as found in many Unix-like environments. There is also support
|
|
||||||
support for CMake, which some users prefer, in particular in Windows
|
|
||||||
environments. There are some instructions for CMake under Windows in the
|
|
||||||
section entitled "Building PCRE with CMake" below. CMake can also be used to
|
|
||||||
build PCRE in Unix-like systems.
|
|
||||||
|
|
||||||
|
|
||||||
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
|
|
||||||
|
|
||||||
The following are generic comments about building the PCRE C library "by hand".
|
|
||||||
|
|
||||||
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
|
||||||
settings that it contains to whatever is appropriate for your environment.
|
|
||||||
In particular, if you want to force a specific value for newline, you can
|
|
||||||
define the NEWLINE macro. When you compile any of the PCRE modules, you
|
|
||||||
must specify -DHAVE_CONFIG_H to your compiler so that config.h is included
|
|
||||||
in the sources.
|
|
||||||
|
|
||||||
An alternative approach is not to edit config.h, but to use -D on the
|
|
||||||
compiler command line to make any changes that you need to the
|
|
||||||
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
|
||||||
|
|
||||||
NOTE: There have been occasions when the way in which certain parameters
|
|
||||||
in config.h are used has changed between releases. (In the configure/make
|
|
||||||
world, this is handled automatically.) When upgrading to a new release,
|
|
||||||
you are strongly advised to review config.h.generic before re-using what
|
|
||||||
you had previously.
|
|
||||||
|
|
||||||
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
|
||||||
|
|
||||||
(3) EITHER:
|
|
||||||
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
|
||||||
|
|
||||||
OR:
|
|
||||||
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
|
|
||||||
you have set up config.h), and then run it with the single argument
|
|
||||||
"pcre_chartables.c". This generates a set of standard character tables
|
|
||||||
and writes them to that file. The tables are generated using the default
|
|
||||||
C locale for your system. If you want to use a locale that is specified
|
|
||||||
by LC_xxx environment variables, add the -L option to the dftables
|
|
||||||
command. You must use this method if you are building on a system that
|
|
||||||
uses EBCDIC code.
|
|
||||||
|
|
||||||
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
|
||||||
specify alternative tables at run time.
|
|
||||||
|
|
||||||
(4) Ensure that you have the following header files:
|
|
||||||
|
|
||||||
pcre_internal.h
|
|
||||||
ucp.h
|
|
||||||
|
|
||||||
(5) Also ensure that you have the following file, which is #included as source
|
|
||||||
when building a debugging version of PCRE, and is also used by pcretest.
|
|
||||||
|
|
||||||
pcre_printint.src
|
|
||||||
|
|
||||||
(6) Compile the following source files, setting -DHAVE_CONFIG_H as a compiler
|
|
||||||
option if you have set up config.h with your configuration, or else use
|
|
||||||
other -D settings to change the configuration as required.
|
|
||||||
|
|
||||||
pcre_chartables.c
|
|
||||||
pcre_compile.c
|
|
||||||
pcre_config.c
|
|
||||||
pcre_dfa_exec.c
|
|
||||||
pcre_exec.c
|
|
||||||
pcre_fullinfo.c
|
|
||||||
pcre_get.c
|
|
||||||
pcre_globals.c
|
|
||||||
pcre_info.c
|
|
||||||
pcre_maketables.c
|
|
||||||
pcre_newline.c
|
|
||||||
pcre_ord2utf8.c
|
|
||||||
pcre_refcount.c
|
|
||||||
pcre_study.c
|
|
||||||
pcre_tables.c
|
|
||||||
pcre_try_flipped.c
|
|
||||||
pcre_ucd.c
|
|
||||||
pcre_valid_utf8.c
|
|
||||||
pcre_version.c
|
|
||||||
pcre_xclass.c
|
|
||||||
|
|
||||||
Make sure that you include -I. in the compiler command (or equivalent for
|
|
||||||
an unusual compiler) so that all included PCRE header files are first
|
|
||||||
sought in the current directory. Otherwise you run the risk of picking up
|
|
||||||
a previously-installed file from somewhere else.
|
|
||||||
|
|
||||||
(7) Now link all the compiled code into an object library in whichever form
|
|
||||||
your system keeps such libraries. This is the basic PCRE C library. If
|
|
||||||
your system has static and shared libraries, you may have to do this once
|
|
||||||
for each type.
|
|
||||||
|
|
||||||
(8) Similarly, compile pcreposix.c (remembering -DHAVE_CONFIG_H if necessary)
|
|
||||||
and link the result (on its own) as the pcreposix library.
|
|
||||||
|
|
||||||
(9) Compile the test program pcretest.c (again, don't forget -DHAVE_CONFIG_H).
|
|
||||||
This needs the functions in the pcre and pcreposix libraries when linking.
|
|
||||||
It also needs the pcre_printint.src source file, which it #includes.
|
|
||||||
|
|
||||||
(10) Run pcretest on the testinput files in the testdata directory, and check
|
|
||||||
that the output matches the corresponding testoutput files. Note that the
|
|
||||||
supplied files are in Unix format, with just LF characters as line
|
|
||||||
terminators. You may need to edit them to change this if your system uses
|
|
||||||
a different convention. If you are using Windows, you probably should use
|
|
||||||
the wintestinput3 file instead of testinput3 (and the corresponding output
|
|
||||||
file). This is a locale test; wintestinput3 sets the locale to "french"
|
|
||||||
rather than "fr_FR", and there some minor output differences.
|
|
||||||
|
|
||||||
(11) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
|
||||||
uses only the basic PCRE library (it does not need the pcreposix library).
|
|
||||||
|
|
||||||
|
|
||||||
THE C++ WRAPPER FUNCTIONS
|
|
||||||
|
|
||||||
The PCRE distribution also contains some C++ wrapper functions and tests,
|
|
||||||
contributed by Google Inc. On a system that can use "configure" and "make",
|
|
||||||
the functions are automatically built into a library called pcrecpp. It should
|
|
||||||
be straightforward to compile the .cc files manually on other systems. The
|
|
||||||
files called xxx_unittest.cc are test programs for each of the corresponding
|
|
||||||
xxx.cc files.
|
|
||||||
|
|
||||||
|
|
||||||
BUILDING FOR VIRTUAL PASCAL
|
|
||||||
|
|
||||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
|
||||||
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
|
|
||||||
additional files. The following files in the distribution are for building PCRE
|
|
||||||
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
|
|
||||||
|
|
||||||
|
|
||||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
|
||||||
|
|
||||||
The default processor stack size of 1Mb in some Windows environments is too
|
|
||||||
small for matching patterns that need much recursion. In particular, test 2 may
|
|
||||||
fail because of this. Normally, running out of stack causes a crash, but there
|
|
||||||
have been cases where the test program has just died silently. See your linker
|
|
||||||
documentation for how to increase stack size if you experience problems. The
|
|
||||||
Linux default of 8Mb is a reasonable choice for the stack, though even that can
|
|
||||||
be too small for some pattern/subject combinations.
|
|
||||||
|
|
||||||
PCRE has a compile configuration option to disable the use of stack for
|
|
||||||
recursion so that heap is used instead. However, pattern matching is
|
|
||||||
significantly slower when this is done. There is more about stack usage in the
|
|
||||||
"pcrestack" documentation.
|
|
||||||
|
|
||||||
|
|
||||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
|
||||||
|
|
||||||
If you want to statically link a program against a PCRE library in the form of
|
|
||||||
a non-dll .a file, you must define PCRE_STATIC before including pcre.h,
|
|
||||||
otherwise the pcre_malloc() and pcre_free() exported functions will be declared
|
|
||||||
__declspec(dllimport), with unwanted results.
|
|
||||||
|
|
||||||
|
|
||||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
|
||||||
|
|
||||||
It is possible to compile programs to use different calling conventions using
|
|
||||||
MSVC. Search the web for "calling conventions" for more information. To make it
|
|
||||||
easier to change the calling convention for the exported functions in the
|
|
||||||
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
|
|
||||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
|
||||||
not set, it defaults to empty; the default calling convention is then used
|
|
||||||
(which is what is wanted most of the time).
|
|
||||||
|
|
||||||
|
|
||||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE WITH CMAKE" below)
|
|
||||||
|
|
||||||
There are two ways of building PCRE using the "configure, make, make install"
|
|
||||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
|
||||||
the same thing; they are completely different from each other. There is also
|
|
||||||
support for building using CMake, which some users find a more straightforward
|
|
||||||
way of building PCRE under Windows. However, the tests are not run
|
|
||||||
automatically when CMake is used.
|
|
||||||
|
|
||||||
The MinGW home page (http://www.mingw.org/) says this:
|
|
||||||
|
|
||||||
MinGW: A collection of freely available and freely distributable Windows
|
|
||||||
specific header files and import libraries combined with GNU toolsets that
|
|
||||||
allow one to produce native Windows programs that do not rely on any
|
|
||||||
3rd-party C runtime DLLs.
|
|
||||||
|
|
||||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
|
||||||
|
|
||||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
|
||||||
|
|
||||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
|
||||||
substantial Linux API functionality
|
|
||||||
|
|
||||||
. A collection of tools which provide Linux look and feel.
|
|
||||||
|
|
||||||
The Cygwin DLL currently works with all recent, commercially released x86 32
|
|
||||||
bit and 64 bit versions of Windows, with the exception of Windows CE.
|
|
||||||
|
|
||||||
On both MinGW and Cygwin, PCRE should build correctly using:
|
|
||||||
|
|
||||||
./configure && make && make install
|
|
||||||
|
|
||||||
This should create two libraries called libpcre and libpcreposix, and, if you
|
|
||||||
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
|
|
||||||
independent libraries: when you like with libpcreposix or libpcrecpp you must
|
|
||||||
also link with libpcre, which contains the basic functions. (Some earlier
|
|
||||||
releases of PCRE included the basic libpcre functions in libpcreposix. This no
|
|
||||||
longer happens.)
|
|
||||||
|
|
||||||
A user submitted a special-purpose patch that makes it easy to create
|
|
||||||
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
|
|
||||||
as a special target. If you use this target, no other files are built, and in
|
|
||||||
particular, the pcretest and pcregrep programs are not built. An example of how
|
|
||||||
this might be used is:
|
|
||||||
|
|
||||||
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
|
|
||||||
|
|
||||||
Using Cygwin's compiler generates libraries and executables that depend on
|
|
||||||
cygwin1.dll. If a library that is generated this way is distributed,
|
|
||||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
|
||||||
licence, this forces not only PCRE to be under the GPL, but also the entire
|
|
||||||
application. A distributor who wants to keep their own code proprietary must
|
|
||||||
purchase an appropriate Cygwin licence.
|
|
||||||
|
|
||||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
|
||||||
executable that can run standalone on Windows without any third party dll or
|
|
||||||
licensing issues.
|
|
||||||
|
|
||||||
But there is more complication:
|
|
||||||
|
|
||||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
|
||||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
|
||||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
|
||||||
gcc and MinGW's gcc). So, a user can:
|
|
||||||
|
|
||||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
|
||||||
-mno-cygwin.
|
|
||||||
|
|
||||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
|
||||||
compiler flags.
|
|
||||||
|
|
||||||
The test files that are supplied with PCRE are in Unix format, with LF
|
|
||||||
characters as line terminators. It may be necessary to change the line
|
|
||||||
terminators in order to get some of the tests to work. We hope to improve
|
|
||||||
things in this area in future.
|
|
||||||
|
|
||||||
|
|
||||||
BUILDING PCRE ON WINDOWS WITH CMAKE
|
|
||||||
|
|
||||||
CMake is an alternative build facility that can be used instead of the
|
|
||||||
traditional Unix "configure". CMake version 2.4.7 supports Borland makefiles,
|
|
||||||
MinGW makefiles, MSYS makefiles, NMake makefiles, UNIX makefiles, Visual Studio
|
|
||||||
6, Visual Studio 7, Visual Studio 8, and Watcom W8. The following instructions
|
|
||||||
were contributed by a PCRE user.
|
|
||||||
|
|
||||||
1. Download CMake 2.4.7 or above from http://www.cmake.org/, install and ensure
|
|
||||||
that cmake\bin is on your path.
|
|
||||||
|
|
||||||
2. Unzip (retaining folder structure) the PCRE source tree into a source
|
|
||||||
directory such as C:\pcre.
|
|
||||||
|
|
||||||
3. Create a new, empty build directory: C:\pcre\build\
|
|
||||||
|
|
||||||
4. Run CMakeSetup from the Shell envirornment of your build tool, e.g., Msys
|
|
||||||
for Msys/MinGW or Visual Studio Command Prompt for VC/VC++
|
|
||||||
|
|
||||||
5. Enter C:\pcre\pcre-xx and C:\pcre\build for the source and build
|
|
||||||
directories, respectively
|
|
||||||
|
|
||||||
6. Hit the "Configure" button.
|
|
||||||
|
|
||||||
7. Select the particular IDE / build tool that you are using (Visual Studio,
|
|
||||||
MSYS makefiles, MinGW makefiles, etc.)
|
|
||||||
|
|
||||||
8. The GUI will then list several configuration options. This is where you can
|
|
||||||
enable UTF-8 support, etc.
|
|
||||||
|
|
||||||
9. Hit "Configure" again. The adjacent "OK" button should now be active.
|
|
||||||
|
|
||||||
10. Hit "OK".
|
|
||||||
|
|
||||||
11. The build directory should now contain a usable build system, be it a
|
|
||||||
solution file for Visual Studio, makefiles for MinGW, etc.
|
|
||||||
|
|
||||||
|
|
||||||
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
|
||||||
|
|
||||||
A PCRE user comments as follows:
|
|
||||||
|
|
||||||
I thought that others may want to know the current state of
|
|
||||||
CMAKE_USE_RELATIVE_PATHS support on Windows.
|
|
||||||
|
|
||||||
Here it is:
|
|
||||||
-- AdditionalIncludeDirectories is only partially modified (only the
|
|
||||||
first path - see below)
|
|
||||||
-- Only some of the contained file paths are modified - shown below for
|
|
||||||
pcre.vcproj
|
|
||||||
-- It properly modifies
|
|
||||||
|
|
||||||
I am sure CMake people can fix that if they want to. Until then one will
|
|
||||||
need to replace existing absolute paths in project files with relative
|
|
||||||
paths manually (e.g. from VS) - relative to project file location. I did
|
|
||||||
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
|
|
||||||
deal.
|
|
||||||
|
|
||||||
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
|
||||||
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
|
||||||
|
|
||||||
RelativePath="pcre.h">
|
|
||||||
RelativePath="pcre_chartables.c">
|
|
||||||
RelativePath="pcre_chartables.c.rule">
|
|
||||||
|
|
||||||
|
|
||||||
TESTING WITH RUNTEST.BAT
|
|
||||||
|
|
||||||
1. Copy RunTest.bat into the directory where pcretest.exe has been created.
|
|
||||||
|
|
||||||
2. Edit RunTest.bat and insert a line that indentifies the relative location of
|
|
||||||
the pcre source, e.g.:
|
|
||||||
|
|
||||||
set srcdir=..\pcre-7.4-RC3
|
|
||||||
|
|
||||||
3. Run RunTest.bat from a command shell environment. Test outputs will
|
|
||||||
automatically be compared to expected results, and discrepancies will
|
|
||||||
identified in the console output.
|
|
||||||
|
|
||||||
4. To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
|
|
||||||
pcre_scanner_unittest.exe.
|
|
||||||
|
|
||||||
|
|
||||||
BUILDING UNDER WINDOWS WITH BCC5.5
|
|
||||||
|
|
||||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
|
||||||
|
|
||||||
Some of the core BCC libraries have a version of PCRE from 1998 built in,
|
|
||||||
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
|
|
||||||
version mismatch. I'm including an easy workaround below, if you'd like to
|
|
||||||
include it in the non-unix instructions:
|
|
||||||
|
|
||||||
When linking a project with BCC5.5, pcre.lib must be included before any of
|
|
||||||
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
|
|
||||||
line.
|
|
||||||
|
|
||||||
|
|
||||||
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
|
||||||
|
|
||||||
Vincent Richomme sent a zip archive of files to help with this process. They
|
|
||||||
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
|
|
||||||
site.
|
|
||||||
|
|
||||||
|
|
||||||
BUILDING PCRE ON OPENVMS
|
|
||||||
|
|
||||||
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
|
|
||||||
relate to an older version of PCRE that used fewer source files, so the exact
|
|
||||||
commands will need changing. See the current list of source files above.
|
|
||||||
|
|
||||||
"It was quite easy to compile and link the library. I don't have a formal
|
|
||||||
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
|
||||||
commands I used to build the library. I had to add #define
|
|
||||||
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
|
|
||||||
|
|
||||||
The library was built on:
|
|
||||||
O/S: HP OpenVMS v7.3-1
|
|
||||||
Compiler: Compaq C v6.5-001-48BCD
|
|
||||||
Linker: vA13-01
|
|
||||||
|
|
||||||
The test results did not match 100% due to the issues you mention in your
|
|
||||||
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
|
|
||||||
modified some of the character tables temporarily and was able to get the
|
|
||||||
results to match. Tests using the fr locale did not match since I don't have
|
|
||||||
that locale loaded. The study size was always reported to be 3 less than the
|
|
||||||
value in the standard test output files."
|
|
||||||
|
|
||||||
=========================
|
|
||||||
$! This DCL procedure builds PCRE on OpenVMS
|
|
||||||
$!
|
|
||||||
$! I followed the instructions in the non-unix-use file in the distribution.
|
|
||||||
$!
|
|
||||||
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
|
|
||||||
$ COMPILE DFTABLES.C
|
|
||||||
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
|
|
||||||
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
|
|
||||||
$ COMPILE MAKETABLES.C
|
|
||||||
$ COMPILE GET.C
|
|
||||||
$ COMPILE STUDY.C
|
|
||||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
|
||||||
$! did not seem to be defined anywhere.
|
|
||||||
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
|
|
||||||
$ COMPILE PCRE.C
|
|
||||||
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
|
|
||||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
|
||||||
$! did not seem to be defined anywhere.
|
|
||||||
$ COMPILE PCREPOSIX.C
|
|
||||||
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
|
|
||||||
$ COMPILE PCRETEST.C
|
|
||||||
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
|
|
||||||
$! C programs that want access to command line arguments must be
|
|
||||||
$! defined as a symbol
|
|
||||||
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
|
|
||||||
$! Arguments must be enclosed in quotes.
|
|
||||||
$ PCRETEST "-C"
|
|
||||||
$! Test results:
|
|
||||||
$!
|
|
||||||
$! The test results did not match 100%. The functions isprint(), iscntrl(),
|
|
||||||
$! isgraph() and ispunct() on OpenVMS must not produce the same results
|
|
||||||
$! as the system that built the test output files provided with the
|
|
||||||
$! distribution.
|
|
||||||
$!
|
|
||||||
$! The study size did not match and was always 3 less on OpenVMS.
|
|
||||||
$!
|
|
||||||
$! Locale could not be set to fr
|
|
||||||
$!
|
|
||||||
=========================
|
|
||||||
|
|
||||||
Last Updated: 17 March 2009
|
|
||||||
****
|
|
||||||
|
@ -4,12 +4,15 @@
|
|||||||
# processing of the documentation, detrails files, and creates pcre.h.generic
|
# processing of the documentation, detrails files, and creates pcre.h.generic
|
||||||
# and config.h.generic (for use by builders who can't run ./configure).
|
# and config.h.generic (for use by builders who can't run ./configure).
|
||||||
|
|
||||||
# You must run this script before runnning "make dist". It makes use of the
|
# You must run this script before runnning "make dist". If its first argument
|
||||||
# following files:
|
# is "doc", it stops after preparing the documentation. There are no other
|
||||||
|
# arguments. The script makes use of the following files:
|
||||||
|
|
||||||
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
|
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
|
||||||
# is called from MakeRelease. It "knows" the relevant troff
|
# "knows" the relevant troff constructs that are used in the PCRE
|
||||||
# constructs that are used in the PCRE man pages.
|
# man pages.
|
||||||
|
|
||||||
|
# CheckMan A Perl script that checks man pages for typos in the mark up.
|
||||||
|
|
||||||
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
|
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
|
||||||
# removing backspaces and other redundant text so as to produce
|
# removing backspaces and other redundant text so as to produce
|
||||||
@ -23,11 +26,20 @@
|
|||||||
# doc/html can be deleted and re-created from scratch.
|
# doc/html can be deleted and re-created from scratch.
|
||||||
|
|
||||||
|
|
||||||
# First, sort out the documentation
|
# First, sort out the documentation. Remove pcredemo.3 first because it won't
|
||||||
|
# pass the markup check (it is created below, using markup that none of the
|
||||||
|
# other pages use).
|
||||||
|
|
||||||
cd doc
|
cd doc
|
||||||
echo Processing documentation
|
echo Processing documentation
|
||||||
|
|
||||||
|
/bin/rm -f pcredemo.3
|
||||||
|
|
||||||
|
# Check the remaining man pages
|
||||||
|
|
||||||
|
perl ../CheckMan *.1 *.3
|
||||||
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
|
||||||
# Make Text form of the documentation. It needs some mangling to make it
|
# Make Text form of the documentation. It needs some mangling to make it
|
||||||
# tidy for online reading. Concatenate all the .3 stuff, but omit the
|
# tidy for online reading. Concatenate all the .3 stuff, but omit the
|
||||||
# individual function pages.
|
# individual function pages.
|
||||||
@ -37,20 +49,22 @@ cat <<End >pcre.txt
|
|||||||
This file contains a concatenation of the PCRE man pages, converted to plain
|
This file contains a concatenation of the PCRE man pages, converted to plain
|
||||||
text format for ease of searching with a text editor, or for use on systems
|
text format for ease of searching with a text editor, or for use on systems
|
||||||
that do not have a man page processor. The small individual files that give
|
that do not have a man page processor. The small individual files that give
|
||||||
synopses of each function in the library have not been included. There are
|
synopses of each function in the library have not been included. Neither has
|
||||||
separate text files for the pcregrep and pcretest commands.
|
the pcredemo program. There are separate text files for the pcregrep and
|
||||||
|
pcretest commands.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
End
|
End
|
||||||
|
|
||||||
echo "Making pcre.txt"
|
echo "Making pcre.txt"
|
||||||
for file in pcre pcrebuild pcrematching pcreapi pcrecallout pcrecompat \
|
for file in pcre pcre16 pcre32 pcrebuild pcrematching pcreapi pcrecallout \
|
||||||
pcrepattern pcresyntax pcrepartial pcreprecompile \
|
pcrecompat pcrepattern pcresyntax pcreunicode pcrejit pcrepartial \
|
||||||
pcreperform pcreposix pcrecpp pcresample pcrestack ; do
|
pcreprecompile pcreperform pcreposix pcrecpp pcresample \
|
||||||
|
pcrelimits pcrestack ; do
|
||||||
echo " Processing $file.3"
|
echo " Processing $file.3"
|
||||||
nroff -c -man $file.3 >$file.rawtxt
|
nroff -c -man $file.3 >$file.rawtxt
|
||||||
../CleanTxt <$file.rawtxt >>pcre.txt
|
perl ../CleanTxt <$file.rawtxt >>pcre.txt
|
||||||
/bin/rm $file.rawtxt
|
/bin/rm $file.rawtxt
|
||||||
echo "------------------------------------------------------------------------------" >>pcre.txt
|
echo "------------------------------------------------------------------------------" >>pcre.txt
|
||||||
if [ "$file" != "pcresample" ] ; then
|
if [ "$file" != "pcresample" ] ; then
|
||||||
@ -63,11 +77,46 @@ done
|
|||||||
for file in pcretest pcregrep pcre-config ; do
|
for file in pcretest pcregrep pcre-config ; do
|
||||||
echo Making $file.txt
|
echo Making $file.txt
|
||||||
nroff -c -man $file.1 >$file.rawtxt
|
nroff -c -man $file.1 >$file.rawtxt
|
||||||
../CleanTxt <$file.rawtxt >$file.txt
|
perl ../CleanTxt <$file.rawtxt >$file.txt
|
||||||
/bin/rm $file.rawtxt
|
/bin/rm $file.rawtxt
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
|
# Make pcredemo.3 from the pcredemo.c source file
|
||||||
|
|
||||||
|
echo "Making pcredemo.3"
|
||||||
|
perl <<"END" >pcredemo.3
|
||||||
|
open(IN, "../pcredemo.c") || die "Failed to open pcredemo.c\n";
|
||||||
|
open(OUT, ">pcredemo.3") || die "Failed to open pcredemo.3\n";
|
||||||
|
print OUT ".\\\" Start example.\n" .
|
||||||
|
".de EX\n" .
|
||||||
|
". nr mE \\\\n(.f\n" .
|
||||||
|
". nf\n" .
|
||||||
|
". nh\n" .
|
||||||
|
". ft CW\n" .
|
||||||
|
"..\n" .
|
||||||
|
".\n" .
|
||||||
|
".\n" .
|
||||||
|
".\\\" End example.\n" .
|
||||||
|
".de EE\n" .
|
||||||
|
". ft \\\\n(mE\n" .
|
||||||
|
". fi\n" .
|
||||||
|
". hy \\\\n(HY\n" .
|
||||||
|
"..\n" .
|
||||||
|
".\n" .
|
||||||
|
".EX\n" ;
|
||||||
|
while (<IN>)
|
||||||
|
{
|
||||||
|
s/\\/\\e/g;
|
||||||
|
print OUT;
|
||||||
|
}
|
||||||
|
print OUT ".EE\n";
|
||||||
|
close(IN);
|
||||||
|
close(OUT);
|
||||||
|
END
|
||||||
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
|
||||||
|
|
||||||
# Make HTML form of the documentation.
|
# Make HTML form of the documentation.
|
||||||
|
|
||||||
echo "Making HTML documentation"
|
echo "Making HTML documentation"
|
||||||
@ -77,35 +126,42 @@ cp index.html.src html/index.html
|
|||||||
for file in *.1 ; do
|
for file in *.1 ; do
|
||||||
base=`basename $file .1`
|
base=`basename $file .1`
|
||||||
echo " Making $base.html"
|
echo " Making $base.html"
|
||||||
../132html -toc $base <$file >html/$base.html
|
perl ../132html -toc $base <$file >html/$base.html
|
||||||
done
|
done
|
||||||
|
|
||||||
# Exclude table of contents for function summaries. It seems that expr
|
# Exclude table of contents for function summaries. It seems that expr
|
||||||
# forces an anchored regex. Also exclude them for small pages that have
|
# forces an anchored regex. Also exclude them for small pages that have
|
||||||
# only one section.
|
# only one section.
|
||||||
|
|
||||||
for file in *.3 ; do
|
for file in *.3 ; do
|
||||||
base=`basename $file .3`
|
base=`basename $file .3`
|
||||||
toc=-toc
|
toc=-toc
|
||||||
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
|
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
|
||||||
if [ "$base" = "pcresample" ] || \
|
if [ "$base" = "pcresample" ] || \
|
||||||
[ "$base" = "pcrestack" ] || \
|
[ "$base" = "pcrestack" ] || \
|
||||||
[ "$base" = "pcrecompat" ] || \
|
[ "$base" = "pcrecompat" ] || \
|
||||||
[ "$base" = "pcreperform" ] ; then
|
[ "$base" = "pcrelimits" ] || \
|
||||||
|
[ "$base" = "pcreperform" ] || \
|
||||||
|
[ "$base" = "pcreunicode" ] ; then
|
||||||
toc=""
|
toc=""
|
||||||
fi
|
fi
|
||||||
echo " Making $base.html"
|
echo " Making $base.html"
|
||||||
../132html $toc $base <$file >html/$base.html
|
perl ../132html $toc $base <$file >html/$base.html
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# End of documentation processing
|
# End of documentation processing; stop if only documentation required.
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
echo Documentation done
|
echo Documentation done
|
||||||
|
if [ "$1" = "doc" ] ; then exit; fi
|
||||||
|
|
||||||
# These files are detrailed; do not detrail the test data because there may be
|
# These files are detrailed; do not detrail the test data because there may be
|
||||||
# significant trailing spaces. The configure files are also omitted from the
|
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
|
||||||
# detrailing.
|
# line endings and the detrail script removes all trailing white space. The
|
||||||
|
# configure files are also omitted from the detrailing. We don't bother with
|
||||||
|
# those pcre[16|32]_xx files that just define COMPILE_PCRE16 and then #include the
|
||||||
|
# common file, because they aren't going to change.
|
||||||
|
|
||||||
files="\
|
files="\
|
||||||
Makefile.am \
|
Makefile.am \
|
||||||
@ -117,6 +173,7 @@ files="\
|
|||||||
AUTHORS \
|
AUTHORS \
|
||||||
NEWS \
|
NEWS \
|
||||||
NON-UNIX-USE \
|
NON-UNIX-USE \
|
||||||
|
NON-AUTOTOOLS-BUILD \
|
||||||
INSTALL \
|
INSTALL \
|
||||||
132html \
|
132html \
|
||||||
CleanTxt \
|
CleanTxt \
|
||||||
@ -125,12 +182,13 @@ files="\
|
|||||||
CMakeLists.txt \
|
CMakeLists.txt \
|
||||||
RunGrepTest \
|
RunGrepTest \
|
||||||
RunTest \
|
RunTest \
|
||||||
RunTest.bat \
|
|
||||||
pcre-config.in \
|
pcre-config.in \
|
||||||
libpcre.pc.in \
|
libpcre.pc.in \
|
||||||
|
libpcre16.pc.in \
|
||||||
|
libpcre32.pc.in \
|
||||||
|
libpcreposix.pc.in \
|
||||||
libpcrecpp.pc.in \
|
libpcrecpp.pc.in \
|
||||||
config.h.in \
|
config.h.in \
|
||||||
pcre_printint.src \
|
|
||||||
pcre_chartables.c.dist \
|
pcre_chartables.c.dist \
|
||||||
pcredemo.c \
|
pcredemo.c \
|
||||||
pcregrep.c \
|
pcregrep.c \
|
||||||
@ -139,7 +197,8 @@ files="\
|
|||||||
pcreposix.c \
|
pcreposix.c \
|
||||||
pcreposix.h \
|
pcreposix.h \
|
||||||
pcre.h.in \
|
pcre.h.in \
|
||||||
pcre_internal.h
|
pcre_internal.h \
|
||||||
|
pcre_byte_order.c \
|
||||||
pcre_compile.c \
|
pcre_compile.c \
|
||||||
pcre_config.c \
|
pcre_config.c \
|
||||||
pcre_dfa_exec.c \
|
pcre_dfa_exec.c \
|
||||||
@ -147,18 +206,26 @@ files="\
|
|||||||
pcre_fullinfo.c \
|
pcre_fullinfo.c \
|
||||||
pcre_get.c \
|
pcre_get.c \
|
||||||
pcre_globals.c \
|
pcre_globals.c \
|
||||||
pcre_info.c \
|
pcre_jit_compile.c \
|
||||||
|
pcre_jit_test.c \
|
||||||
pcre_maketables.c \
|
pcre_maketables.c \
|
||||||
pcre_newline.c \
|
pcre_newline.c \
|
||||||
pcre_ord2utf8.c \
|
pcre_ord2utf8.c \
|
||||||
|
pcre16_ord2utf16.c \
|
||||||
|
pcre32_ord2utf32.c \
|
||||||
|
pcre_printint.c \
|
||||||
pcre_refcount.c \
|
pcre_refcount.c \
|
||||||
|
pcre_string_utils.c \
|
||||||
pcre_study.c \
|
pcre_study.c \
|
||||||
pcre_tables.c \
|
pcre_tables.c \
|
||||||
pcre_try_flipped.c \
|
|
||||||
pcre_ucp_searchfuncs.c \
|
pcre_ucp_searchfuncs.c \
|
||||||
pcre_valid_utf8.c \
|
pcre_valid_utf8.c \
|
||||||
pcre_version.c \
|
pcre_version.c \
|
||||||
pcre_xclass.c \
|
pcre_xclass.c \
|
||||||
|
pcre16_utf16_utils.c \
|
||||||
|
pcre32_utf32_utils.c \
|
||||||
|
pcre16_valid_utf16.c \
|
||||||
|
pcre32_valid_utf32.c \
|
||||||
pcre_scanner.cc \
|
pcre_scanner.cc \
|
||||||
pcre_scanner.h \
|
pcre_scanner.h \
|
||||||
pcre_scanner_unittest.cc \
|
pcre_scanner_unittest.cc \
|
||||||
@ -179,35 +246,7 @@ files="\
|
|||||||
libpcreposix.def"
|
libpcreposix.def"
|
||||||
|
|
||||||
echo Detrailing
|
echo Detrailing
|
||||||
./Detrail $files doc/p* doc/html/*
|
perl ./Detrail $files doc/p* doc/html/*
|
||||||
|
|
||||||
echo Doing basic configure to get default pcre.h and config.h
|
|
||||||
# This is in case the caller has set aliases (as I do - PH)
|
|
||||||
unset cp ls mv rm
|
|
||||||
./configure >/dev/null
|
|
||||||
|
|
||||||
echo Converting pcre.h and config.h to generic forms
|
|
||||||
cp -f pcre.h pcre.h.generic
|
|
||||||
|
|
||||||
perl <<'END'
|
|
||||||
open(IN, "<config.h") || die "Can't open config.h: $!\n";
|
|
||||||
open(OUT, ">config.h.generic") || die "Can't open config.h.generic: $!\n";
|
|
||||||
while (<IN>)
|
|
||||||
{
|
|
||||||
if (/^#define\s(?!PACKAGE)(\w+)/)
|
|
||||||
{
|
|
||||||
print OUT "#ifndef $1\n";
|
|
||||||
print OUT;
|
|
||||||
print OUT "#endif\n";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
print OUT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close IN;
|
|
||||||
close OUT;
|
|
||||||
END
|
|
||||||
|
|
||||||
echo Done
|
echo Done
|
||||||
|
|
||||||
|
@ -18,12 +18,14 @@ The contents of this README file are:
|
|||||||
The PCRE APIs
|
The PCRE APIs
|
||||||
Documentation for PCRE
|
Documentation for PCRE
|
||||||
Contributions by users of PCRE
|
Contributions by users of PCRE
|
||||||
Building PCRE on non-Unix systems
|
Building PCRE on non-Unix-like systems
|
||||||
Building PCRE on Unix-like systems
|
Building PCRE without using autotools
|
||||||
Retrieving configuration information on Unix-like systems
|
Building PCRE using autotools
|
||||||
Shared libraries on Unix-like systems
|
Retrieving configuration information
|
||||||
Cross-compiling on Unix-like systems
|
Shared libraries
|
||||||
|
Cross-compiling using autotools
|
||||||
Using HP's ANSI C++ compiler (aCC)
|
Using HP's ANSI C++ compiler (aCC)
|
||||||
|
Using PCRE from MySQL
|
||||||
Making new tarballs
|
Making new tarballs
|
||||||
Testing PCRE
|
Testing PCRE
|
||||||
Character tables
|
Character tables
|
||||||
@ -33,16 +35,20 @@ The contents of this README file are:
|
|||||||
The PCRE APIs
|
The PCRE APIs
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
PCRE is written in C, and it has its own API. The distribution also includes a
|
PCRE is written in C, and it has its own API. There are three sets of functions,
|
||||||
set of C++ wrapper functions (see the pcrecpp man page for details), courtesy
|
one for the 8-bit library, which processes strings of bytes, one for the
|
||||||
of Google Inc.
|
16-bit library, which processes strings of 16-bit values, and one for the 32-bit
|
||||||
|
library, which processes strings of 32-bit values. The distribution also
|
||||||
|
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
|
||||||
|
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
|
||||||
|
C++.
|
||||||
|
|
||||||
In addition, there is a set of C wrapper functions that are based on the POSIX
|
In addition, there is a set of C wrapper functions (again, just for the 8-bit
|
||||||
regular expression API (see the pcreposix man page). These end up in the
|
library) that are based on the POSIX regular expression API (see the pcreposix
|
||||||
library called libpcreposix. Note that this just provides a POSIX calling
|
man page). These end up in the library called libpcreposix. Note that this just
|
||||||
interface to PCRE; the regular expressions themselves still follow Perl syntax
|
provides a POSIX calling interface to PCRE; the regular expressions themselves
|
||||||
and semantics. The POSIX API is restricted, and does not give full access to
|
still follow Perl syntax and semantics. The POSIX API is restricted, and does
|
||||||
all of PCRE's facilities.
|
not give full access to all of PCRE's facilities.
|
||||||
|
|
||||||
The header file for the POSIX-style functions is called pcreposix.h. The
|
The header file for the POSIX-style functions is called pcreposix.h. The
|
||||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||||
@ -105,36 +111,45 @@ Windows (I myself do not use Windows). Nowadays there is more Windows support
|
|||||||
in the standard distribution, so these contibutions have been archived.
|
in the standard distribution, so these contibutions have been archived.
|
||||||
|
|
||||||
|
|
||||||
Building PCRE on non-Unix systems
|
Building PCRE on non-Unix-like systems
|
||||||
---------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
For a non-Unix system, please read the comments in the file NON-UNIX-USE,
|
For a non-Unix-like system, please read the comments in the file
|
||||||
though if your system supports the use of "configure" and "make" you may be
|
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||||
able to build PCRE in the same way as for Unix-like systems. PCRE can also be
|
"make" you may be able to build PCRE using autotools in the same way as for
|
||||||
configured in many platform environments using the GUI facility of CMake's
|
many Unix-like systems.
|
||||||
CMakeSetup. It creates Makefiles, solution files, etc.
|
|
||||||
|
PCRE can also be configured using the GUI facility provided by CMake's
|
||||||
|
cmake-gui command. This creates Makefiles, solution files, etc. The file
|
||||||
|
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||||
|
|
||||||
PCRE has been compiled on many different operating systems. It should be
|
PCRE has been compiled on many different operating systems. It should be
|
||||||
straightforward to build PCRE on any system that has a Standard C compiler and
|
straightforward to build PCRE on any system that has a Standard C compiler and
|
||||||
library, because it uses only Standard C functions.
|
library, because it uses only Standard C functions.
|
||||||
|
|
||||||
|
|
||||||
Building PCRE on Unix-like systems
|
Building PCRE without using autotools
|
||||||
----------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
|
The use of autotools (in particular, libtool) is problematic in some
|
||||||
|
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||||
|
file for ways of building PCRE without using autotools.
|
||||||
|
|
||||||
|
|
||||||
|
Building PCRE using autotools
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
If you are using HP's ANSI C++ compiler (aCC), please see the special note
|
If you are using HP's ANSI C++ compiler (aCC), please see the special note
|
||||||
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
|
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
|
||||||
|
|
||||||
The following instructions assume the use of the widely used "configure, make,
|
The following instructions assume the use of the widely used "configure; make;
|
||||||
make install" process. There is also support for CMake in the PCRE
|
make install" (autotools) process.
|
||||||
distribution; there are some comments about using CMake in the NON-UNIX-USE
|
|
||||||
file, though it can also be used in Unix-like systems.
|
|
||||||
|
|
||||||
To build PCRE on a Unix-like system, first run the "configure" command from the
|
To build PCRE on system that supports autotools, first run the "configure"
|
||||||
PCRE distribution directory, with your current directory set to the directory
|
command from the PCRE distribution directory, with your current directory set
|
||||||
where you want the files to be created. This command is a standard GNU
|
to the directory where you want the files to be created. This command is a
|
||||||
"autoconf" configuration script, for which generic instructions are supplied in
|
standard GNU "autoconf" configuration script, for which generic instructions
|
||||||
the file INSTALL.
|
are supplied in the file INSTALL.
|
||||||
|
|
||||||
Most commonly, people build PCRE within its own distribution directory, and in
|
Most commonly, people build PCRE within its own distribution directory, and in
|
||||||
this case, on many systems, just running "./configure" is sufficient. However,
|
this case, on many systems, just running "./configure" is sufficient. However,
|
||||||
@ -142,9 +157,9 @@ the usual methods of changing standard defaults are available. For example:
|
|||||||
|
|
||||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||||
|
|
||||||
specifies that the C compiler should be run with the flags '-O2 -Wall' instead
|
This command specifies that the C compiler should be run with the flags '-O2
|
||||||
of the default, and that "make install" should install PCRE under /opt/local
|
-Wall' instead of the default, and that "make install" should install PCRE
|
||||||
instead of the default /usr/local.
|
under /opt/local instead of the default /usr/local.
|
||||||
|
|
||||||
If you want to build in a different directory, just run "configure" with that
|
If you want to build in a different directory, just run "configure" with that
|
||||||
directory as current. For example, suppose you have unpacked the PCRE source
|
directory as current. For example, suppose you have unpacked the PCRE source
|
||||||
@ -158,27 +173,62 @@ possible to build it as a C++ library, though the provided building apparatus
|
|||||||
does not have any features to support this.
|
does not have any features to support this.
|
||||||
|
|
||||||
There are some optional features that can be included or omitted from the PCRE
|
There are some optional features that can be included or omitted from the PCRE
|
||||||
library. You can read more about them in the pcrebuild man page.
|
library. They are also documented in the pcrebuild man page.
|
||||||
|
|
||||||
. If you want to suppress the building of the C++ wrapper library, you can add
|
. By default, both shared and static libraries are built. You can change this
|
||||||
--disable-cpp to the "configure" command. Otherwise, when "configure" is run,
|
by adding one of these options to the "configure" command:
|
||||||
it will try to find a C++ compiler and C++ header files, and if it succeeds,
|
|
||||||
it will try to build the C++ wrapper.
|
--disable-shared
|
||||||
|
--disable-static
|
||||||
|
|
||||||
|
(See also "Shared libraries on Unix-like systems" below.)
|
||||||
|
|
||||||
|
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
|
||||||
|
the "configure" command, the 16-bit library is also built. If you add
|
||||||
|
--enable-pcre32 to the "configure" command, the 32-bit library is also built.
|
||||||
|
If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
|
||||||
|
building the 8-bit library.
|
||||||
|
|
||||||
|
. If you are building the 8-bit library and want to suppress the building of
|
||||||
|
the C++ wrapper library, you can add --disable-cpp to the "configure"
|
||||||
|
command. Otherwise, when "configure" is run without --disable-pcre8, it will
|
||||||
|
try to find a C++ compiler and C++ header files, and if it succeeds, it will
|
||||||
|
try to build the C++ wrapper.
|
||||||
|
|
||||||
|
. If you want to include support for just-in-time compiling, which can give
|
||||||
|
large performance improvements on certain platforms, add --enable-jit to the
|
||||||
|
"configure" command. This support is available only for certain hardware
|
||||||
|
architectures. If you try to enable it on an unsupported architecture, there
|
||||||
|
will be a compile time error.
|
||||||
|
|
||||||
|
. When JIT support is enabled, pcregrep automatically makes use of it, unless
|
||||||
|
you add --disable-pcregrep-jit to the "configure" command.
|
||||||
|
|
||||||
. If you want to make use of the support for UTF-8 Unicode character strings in
|
. If you want to make use of the support for UTF-8 Unicode character strings in
|
||||||
PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
|
the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
|
||||||
code for handling UTF-8 is not included in the library. Even when included,
|
or UTF-32 Unicode character strings in the 32-bit library, you must add
|
||||||
it still has to be enabled by an option at run time. When PCRE is compiled
|
--enable-utf to the "configure" command. Without it, the code for handling
|
||||||
with this option, its input can only either be ASCII or UTF-8, even when
|
UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
|
||||||
running on EBCDIC platforms. It is not possible to use both --enable-utf8 and
|
when --enable-utf is included, the use of a UTF encoding still has to be
|
||||||
--enable-ebcdic at the same time.
|
enabled by an option at run time. When PCRE is compiled with this option, its
|
||||||
|
input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
|
||||||
|
platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
|
||||||
|
the same time.
|
||||||
|
|
||||||
. If, in addition to support for UTF-8 character strings, you want to include
|
. There are no separate options for enabling UTF-8, UTF-16 and UTF-32
|
||||||
support for the \P, \p, and \X sequences that recognize Unicode character
|
independently because that would allow ridiculous settings such as requesting
|
||||||
properties, you must add --enable-unicode-properties to the "configure"
|
UTF-16 support while building only the 8-bit library. However, the option
|
||||||
command. This adds about 30K to the size of the library (in the form of a
|
--enable-utf8 is retained for backwards compatibility with earlier releases
|
||||||
property table); only the basic two-letter properties such as Lu are
|
that did not support 16-bit or 32-bit character strings. It is synonymous with
|
||||||
supported.
|
--enable-utf. It is not possible to configure one library with UTF support
|
||||||
|
and the other without in the same configuration.
|
||||||
|
|
||||||
|
. If, in addition to support for UTF-8/16/32 character strings, you want to
|
||||||
|
include support for the \P, \p, and \X sequences that recognize Unicode
|
||||||
|
character properties, you must add --enable-unicode-properties to the
|
||||||
|
"configure" command. This adds about 30K to the size of the library (in the
|
||||||
|
form of a property table); only the basic two-letter properties such as Lu
|
||||||
|
are supported.
|
||||||
|
|
||||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
|
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
|
||||||
of the preceding, or any of the Unicode newline sequences as indicating the
|
of the preceding, or any of the Unicode newline sequences as indicating the
|
||||||
@ -231,10 +281,12 @@ library. You can read more about them in the pcrebuild man page.
|
|||||||
sizes in the pcrestack man page.
|
sizes in the pcrestack man page.
|
||||||
|
|
||||||
. The default maximum compiled pattern size is around 64K. You can increase
|
. The default maximum compiled pattern size is around 64K. You can increase
|
||||||
this by adding --with-link-size=3 to the "configure" command. You can
|
this by adding --with-link-size=3 to the "configure" command. In the 8-bit
|
||||||
increase it even more by setting --with-link-size=4, but this is unlikely
|
library, PCRE then uses three bytes instead of two for offsets to different
|
||||||
ever to be necessary. Increasing the internal link size will reduce
|
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||||
performance.
|
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||||
|
offsets. Increasing the internal link size reduces performance. In the 32-bit
|
||||||
|
library, the only supported link size is 4.
|
||||||
|
|
||||||
. You can build PCRE so that its internal match() function that is called from
|
. You can build PCRE so that its internal match() function that is called from
|
||||||
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
|
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
|
||||||
@ -246,9 +298,10 @@ library. You can read more about them in the pcrebuild man page.
|
|||||||
|
|
||||||
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
||||||
necessary in environments with limited stack sizes. This applies only to the
|
necessary in environments with limited stack sizes. This applies only to the
|
||||||
pcre_exec() function; it does not apply to pcre_dfa_exec(), which does not
|
normal execution of the pcre_exec() function; if JIT support is being
|
||||||
use deeply nested recursion. There is a discussion about stack sizes in the
|
successfully used, it is not relevant. Equally, it does not apply to
|
||||||
pcrestack man page.
|
pcre_dfa_exec(), which does not use deeply nested recursion. There is a
|
||||||
|
discussion about stack sizes in the pcrestack man page.
|
||||||
|
|
||||||
. For speed, PCRE uses four tables for manipulating and identifying characters
|
. For speed, PCRE uses four tables for manipulating and identifying characters
|
||||||
whose code point values are less than 256. By default, it uses a set of
|
whose code point values are less than 256. By default, it uses a set of
|
||||||
@ -262,33 +315,64 @@ library. You can read more about them in the pcrebuild man page.
|
|||||||
pcre_chartables.c.dist. See "Character tables" below for further information.
|
pcre_chartables.c.dist. See "Character tables" below for further information.
|
||||||
|
|
||||||
. It is possible to compile PCRE for use on systems that use EBCDIC as their
|
. It is possible to compile PCRE for use on systems that use EBCDIC as their
|
||||||
character code (as opposed to ASCII) by specifying
|
character code (as opposed to ASCII/Unicode) by specifying
|
||||||
|
|
||||||
--enable-ebcdic
|
--enable-ebcdic
|
||||||
|
|
||||||
This automatically implies --enable-rebuild-chartables (see above). However,
|
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||||
when PCRE is built this way, it always operates in EBCDIC. It cannot support
|
when PCRE is built this way, it always operates in EBCDIC. It cannot support
|
||||||
both EBCDIC and UTF-8.
|
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
|
||||||
|
which specifies that the code value for the EBCDIC NL character is 0x25
|
||||||
|
instead of the default 0x15.
|
||||||
|
|
||||||
. It is possible to compile pcregrep to use libz and/or libbz2, in order to
|
. In environments where valgrind is installed, if you specify
|
||||||
read .gz and .bz2 files (respectively), by specifying one or both of
|
|
||||||
|
--enable-valgrind
|
||||||
|
|
||||||
|
PCRE will use valgrind annotations to mark certain memory regions as
|
||||||
|
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||||
|
mostly useful for debugging PCRE itself.
|
||||||
|
|
||||||
|
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||||
|
is installed, if you specify
|
||||||
|
|
||||||
|
--enable-coverage
|
||||||
|
|
||||||
|
the build process implements a code coverage report for the test suite. The
|
||||||
|
report is generated by running "make coverage". If ccache is installed on
|
||||||
|
your system, it must be disabled when building PCRE for coverage reporting.
|
||||||
|
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||||
|
running "make" to build PCRE.
|
||||||
|
|
||||||
|
. The pcregrep program currently supports only 8-bit data files, and so
|
||||||
|
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||||
|
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||||
|
specifying one or both of
|
||||||
|
|
||||||
--enable-pcregrep-libz
|
--enable-pcregrep-libz
|
||||||
--enable-pcregrep-libbz2
|
--enable-pcregrep-libbz2
|
||||||
|
|
||||||
Of course, the relevant libraries must be installed on your system.
|
Of course, the relevant libraries must be installed on your system.
|
||||||
|
|
||||||
. It is possible to compile pcretest so that it links with the libreadline
|
. The default size of internal buffer used by pcregrep can be set by, for
|
||||||
library, by specifying
|
example:
|
||||||
|
|
||||||
--enable-pcretest-libreadline
|
--with-pcregrep-bufsize=50K
|
||||||
|
|
||||||
|
The default value is 20K.
|
||||||
|
|
||||||
|
. It is possible to compile pcretest so that it links with the libreadline
|
||||||
|
or libedit libraries, by specifying, respectively,
|
||||||
|
|
||||||
|
--enable-pcretest-libreadline or --enable-pcretest-libedit
|
||||||
|
|
||||||
If this is done, when pcretest's input is from a terminal, it reads it using
|
If this is done, when pcretest's input is from a terminal, it reads it using
|
||||||
the readline() function. This provides line-editing and history facilities.
|
the readline() function. This provides line-editing and history facilities.
|
||||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||||
pcretest linked in this way, there may be licensing issues.
|
pcretest linked in this way, there may be licensing issues. These can be
|
||||||
|
avoided by linking with libedit (which has a BSD licence) instead.
|
||||||
|
|
||||||
Setting this option causes the -lreadline option to be added to the pcretest
|
Enabling libreadline causes the -lreadline option to be added to the pcretest
|
||||||
build. In many operating environments with a sytem-installed readline
|
build. In many operating environments with a sytem-installed readline
|
||||||
library this is sufficient. However, in some environments (e.g. if an
|
library this is sufficient. However, in some environments (e.g. if an
|
||||||
unmodified distribution version of readline is in use), it may be necessary
|
unmodified distribution version of readline is in use), it may be necessary
|
||||||
@ -301,37 +385,43 @@ library. You can read more about them in the pcrebuild man page.
|
|||||||
|
|
||||||
The "configure" script builds the following files for the basic C library:
|
The "configure" script builds the following files for the basic C library:
|
||||||
|
|
||||||
. Makefile is the makefile that builds the library
|
. Makefile the makefile that builds the library
|
||||||
. config.h contains build-time configuration options for the library
|
. config.h build-time configuration options for the library
|
||||||
. pcre.h is the public PCRE header file
|
. pcre.h the public PCRE header file
|
||||||
. pcre-config is a script that shows the settings of "configure" options
|
. pcre-config script that shows the building settings such as CFLAGS
|
||||||
. libpcre.pc is data for the pkg-config command
|
that were set for "configure"
|
||||||
. libtool is a script that builds shared and/or static libraries
|
. libpcre.pc ) data for the pkg-config command
|
||||||
. RunTest is a script for running tests on the basic C library
|
. libpcre16.pc )
|
||||||
. RunGrepTest is a script for running tests on the pcregrep command
|
. libpcre32.pc )
|
||||||
|
. libpcreposix.pc )
|
||||||
|
. libtool script that builds shared and/or static libraries
|
||||||
|
|
||||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under
|
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
|
||||||
the names config.h.generic and pcre.h.generic. These are provided for the
|
names config.h.generic and pcre.h.generic. These are provided for those who
|
||||||
benefit of those who have to built PCRE without the benefit of "configure". If
|
have to built PCRE without using "configure" or CMake. If you use "configure"
|
||||||
you use "configure", the .generic versions are not used.
|
or CMake, the .generic versions are not used.
|
||||||
|
|
||||||
If a C++ compiler is found, the following files are also built:
|
When building the 8-bit library, if a C++ compiler is found, the following
|
||||||
|
files are also built:
|
||||||
|
|
||||||
. libpcrecpp.pc is data for the pkg-config command
|
. libpcrecpp.pc data for the pkg-config command
|
||||||
. pcrecpparg.h is a header file for programs that call PCRE via the C++ wrapper
|
. pcrecpparg.h header file for calling PCRE via the C++ wrapper
|
||||||
. pcre_stringpiece.h is the header for the C++ "stringpiece" functions
|
. pcre_stringpiece.h header for the C++ "stringpiece" functions
|
||||||
|
|
||||||
The "configure" script also creates config.status, which is an executable
|
The "configure" script also creates config.status, which is an executable
|
||||||
script that can be run to recreate the configuration, and config.log, which
|
script that can be run to recreate the configuration, and config.log, which
|
||||||
contains compiler output from tests that "configure" runs.
|
contains compiler output from tests that "configure" runs.
|
||||||
|
|
||||||
Once "configure" has run, you can run "make". It builds two libraries, called
|
Once "configure" has run, you can run "make". This builds the the libraries
|
||||||
libpcre and libpcreposix, a test program called pcretest, and the pcregrep
|
libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
|
||||||
command. If a C++ compiler was found on your system, "make" also builds the C++
|
enabled JIT support with --enable-jit, a test program called pcre_jit_test is
|
||||||
wrapper library, which is called libpcrecpp, and some test programs called
|
built as well.
|
||||||
pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest.
|
|
||||||
Building the C++ wrapper can be disabled by adding --disable-cpp to the
|
If the 8-bit library is built, libpcreposix and the pcregrep command are also
|
||||||
"configure" command.
|
built, and if a C++ compiler was found on your system, and you did not disable
|
||||||
|
it with --disable-cpp, "make" builds the C++ wrapper library, which is called
|
||||||
|
libpcrecpp, as well as some test programs called pcrecpp_unittest,
|
||||||
|
pcre_scanner_unittest, and pcre_stringpiece_unittest.
|
||||||
|
|
||||||
The command "make check" runs all the appropriate tests. Details of the PCRE
|
The command "make check" runs all the appropriate tests. Details of the PCRE
|
||||||
tests are given below in a separate section of this document.
|
tests are given below in a separate section of this document.
|
||||||
@ -342,16 +432,21 @@ system. The following are installed (file names are all relative to the
|
|||||||
|
|
||||||
Commands (bin):
|
Commands (bin):
|
||||||
pcretest
|
pcretest
|
||||||
pcregrep
|
pcregrep (if 8-bit support is enabled)
|
||||||
pcre-config
|
pcre-config
|
||||||
|
|
||||||
Libraries (lib):
|
Libraries (lib):
|
||||||
libpcre
|
libpcre16 (if 16-bit support is enabled)
|
||||||
libpcreposix
|
libpcre32 (if 32-bit support is enabled)
|
||||||
libpcrecpp (if C++ support is enabled)
|
libpcre (if 8-bit support is enabled)
|
||||||
|
libpcreposix (if 8-bit support is enabled)
|
||||||
|
libpcrecpp (if 8-bit and C++ support is enabled)
|
||||||
|
|
||||||
Configuration information (lib/pkgconfig):
|
Configuration information (lib/pkgconfig):
|
||||||
|
libpcre16.pc
|
||||||
|
libpcre32.pc
|
||||||
libpcre.pc
|
libpcre.pc
|
||||||
|
libpcreposix.pc
|
||||||
libpcrecpp.pc (if C++ support is enabled)
|
libpcrecpp.pc (if C++ support is enabled)
|
||||||
|
|
||||||
Header files (include):
|
Header files (include):
|
||||||
@ -365,6 +460,7 @@ system. The following are installed (file names are all relative to the
|
|||||||
Man pages (share/man/man{1,3}):
|
Man pages (share/man/man{1,3}):
|
||||||
pcregrep.1
|
pcregrep.1
|
||||||
pcretest.1
|
pcretest.1
|
||||||
|
pcre-config.1
|
||||||
pcre.3
|
pcre.3
|
||||||
pcre*.3 (lots more pages, all starting "pcre")
|
pcre*.3 (lots more pages, all starting "pcre")
|
||||||
|
|
||||||
@ -379,17 +475,18 @@ system. The following are installed (file names are all relative to the
|
|||||||
LICENCE
|
LICENCE
|
||||||
NEWS
|
NEWS
|
||||||
README
|
README
|
||||||
pcre.txt (a concatenation of the man(3) pages)
|
pcre.txt (a concatenation of the man(3) pages)
|
||||||
pcretest.txt the pcretest man page
|
pcretest.txt the pcretest man page
|
||||||
pcregrep.txt the pcregrep man page
|
pcregrep.txt the pcregrep man page
|
||||||
|
pcre-config.txt the pcre-config man page
|
||||||
|
|
||||||
If you want to remove PCRE from your system, you can run "make uninstall".
|
If you want to remove PCRE from your system, you can run "make uninstall".
|
||||||
This removes all the files that "make install" installed. However, it does not
|
This removes all the files that "make install" installed. However, it does not
|
||||||
remove any directories, because these are often shared with other programs.
|
remove any directories, because these are often shared with other programs.
|
||||||
|
|
||||||
|
|
||||||
Retrieving configuration information on Unix-like systems
|
Retrieving configuration information
|
||||||
---------------------------------------------------------
|
------------------------------------
|
||||||
|
|
||||||
Running "make install" installs the command pcre-config, which can be used to
|
Running "make install" installs the command pcre-config, which can be used to
|
||||||
recall information about the PCRE configuration and installation. For example:
|
recall information about the PCRE configuration and installation. For example:
|
||||||
@ -414,8 +511,8 @@ The data is held in *.pc files that are installed in a directory called
|
|||||||
<prefix>/lib/pkgconfig.
|
<prefix>/lib/pkgconfig.
|
||||||
|
|
||||||
|
|
||||||
Shared libraries on Unix-like systems
|
Shared libraries
|
||||||
-------------------------------------
|
----------------
|
||||||
|
|
||||||
The default distribution builds PCRE as shared libraries and static libraries,
|
The default distribution builds PCRE as shared libraries and static libraries,
|
||||||
as long as the operating system supports shared libraries. Shared library
|
as long as the operating system supports shared libraries. Shared library
|
||||||
@ -440,8 +537,8 @@ Then run "make" in the usual way. Similarly, you can use --disable-static to
|
|||||||
build only shared libraries.
|
build only shared libraries.
|
||||||
|
|
||||||
|
|
||||||
Cross-compiling on Unix-like systems
|
Cross-compiling using autotools
|
||||||
------------------------------------
|
-------------------------------
|
||||||
|
|
||||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||||
order to cross-compile PCRE for some other host. However, you should NOT
|
order to cross-compile PCRE for some other host. However, you should NOT
|
||||||
@ -478,6 +575,26 @@ running the "configure" script:
|
|||||||
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||||
|
|
||||||
|
|
||||||
|
Using Sun's compilers for Solaris
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
A user reports that the following configurations work on Solaris 9 sparcv9 and
|
||||||
|
Solaris 9 x86 (32-bit):
|
||||||
|
|
||||||
|
Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
|
||||||
|
Solaris 9 x86: ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"
|
||||||
|
|
||||||
|
|
||||||
|
Using PCRE from MySQL
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
On systems where both PCRE and MySQL are installed, it is possible to make use
|
||||||
|
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
|
||||||
|
There is a web page that tells you how to do this:
|
||||||
|
|
||||||
|
http://www.mysqludf.org/lib_mysqludf_preg/index.php
|
||||||
|
|
||||||
|
|
||||||
Making new tarballs
|
Making new tarballs
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
@ -493,30 +610,49 @@ script creates the .txt and HTML forms of the documentation from the man pages.
|
|||||||
Testing PCRE
|
Testing PCRE
|
||||||
------------
|
------------
|
||||||
|
|
||||||
To test the basic PCRE library on a Unix system, run the RunTest script that is
|
To test the basic PCRE library on a Unix-like system, run the RunTest script.
|
||||||
created by the configuring process. There is also a script called RunGrepTest
|
There is another script called RunGrepTest that tests the options of the
|
||||||
that tests the options of the pcregrep command. If the C++ wrapper library is
|
pcregrep command. If the C++ wrapper library is built, three test programs
|
||||||
built, three test programs called pcrecpp_unittest, pcre_scanner_unittest, and
|
called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
|
||||||
pcre_stringpiece_unittest are also built.
|
are also built. When JIT support is enabled, another test program called
|
||||||
|
pcre_jit_test is built.
|
||||||
|
|
||||||
Both the scripts and all the program tests are run if you obey "make check" or
|
Both the scripts and all the program tests are run if you obey "make check" or
|
||||||
"make test". For other systems, see the instructions in NON-UNIX-USE.
|
"make test". For other environments, see the instructions in
|
||||||
|
NON-AUTOTOOLS-BUILD.
|
||||||
|
|
||||||
The RunTest script runs the pcretest test program (which is documented in its
|
The RunTest script runs the pcretest test program (which is documented in its
|
||||||
own man page) on each of the testinput files in the testdata directory in
|
own man page) on each of the relevant testinput files in the testdata
|
||||||
turn, and compares the output with the contents of the corresponding testoutput
|
directory, and compares the output with the contents of the corresponding
|
||||||
files. A file called testtry is used to hold the main output from pcretest
|
testoutput files. Some tests are relevant only when certain build-time options
|
||||||
(testsavedregex is also used as a working file). To run pcretest on just one of
|
were selected. For example, the tests for UTF-8/16/32 support are run only if
|
||||||
the test files, give its number as an argument to RunTest, for example:
|
--enable-utf was used. RunTest outputs a comment when it skips a test.
|
||||||
|
|
||||||
RunTest 2
|
Many of the tests that are not skipped are run up to three times. The second
|
||||||
|
run forces pcre_study() to be called for all patterns except for a few in some
|
||||||
|
tests that are marked "never study" (see the pcretest program for how this is
|
||||||
|
done). If JIT support is available, the non-DFA tests are run a third time,
|
||||||
|
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
|
||||||
|
|
||||||
The first test file can also be fed directly into the perltest.pl script to
|
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||||
check that Perl gives the same results. The only difference you should see is
|
libraries that are enabled. If you want to run just one set of tests, call
|
||||||
in the first few lines, where the Perl version is given instead of the PCRE
|
RunTest with either the -8, -16 or -32 option.
|
||||||
version.
|
|
||||||
|
|
||||||
The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
|
RunTest uses a file called testtry to hold the main output from pcretest.
|
||||||
|
Other files whose names begin with "test" are used as working files in some
|
||||||
|
tests. To run pcretest on just one or more specific test files, give their
|
||||||
|
numbers as arguments to RunTest, for example:
|
||||||
|
|
||||||
|
RunTest 2 7 11
|
||||||
|
|
||||||
|
You can also call RunTest with the single argument "list" to cause it to output
|
||||||
|
a list of tests.
|
||||||
|
|
||||||
|
The first test file can be fed directly into the perltest.pl script to check
|
||||||
|
that Perl gives the same results. The only difference you should see is in the
|
||||||
|
first few lines, where the Perl version is given instead of the PCRE version.
|
||||||
|
|
||||||
|
The second set of tests check pcre_fullinfo(), pcre_study(),
|
||||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
||||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
||||||
wrapper API. It also uses the debugging flags to check some of the internals of
|
wrapper API. It also uses the debugging flags to check some of the internals of
|
||||||
@ -551,24 +687,38 @@ RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
|
|||||||
Windows versions of test 2. More info on using RunTest.bat is included in the
|
Windows versions of test 2. More info on using RunTest.bat is included in the
|
||||||
document entitled NON-UNIX-USE.]
|
document entitled NON-UNIX-USE.]
|
||||||
|
|
||||||
The fourth test checks the UTF-8 support. It is not run automatically unless
|
The fourth and fifth tests check the UTF-8/16/32 support and error handling and
|
||||||
PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when
|
internal UTF features of PCRE that are not relevant to Perl, respectively. The
|
||||||
running "configure". This file can be also fed directly to the perltest script,
|
sixth and seventh tests do the same for Unicode character properties support.
|
||||||
provided you are running Perl 5.8 or higher. (For Perl 5.6, a small patch,
|
|
||||||
commented in the script, can be be used.)
|
|
||||||
|
|
||||||
The fifth test checks error handling with UTF-8 encoding, and internal UTF-8
|
The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
|
||||||
features of PCRE that are not relevant to Perl.
|
matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
|
||||||
|
mode with Unicode property support, respectively.
|
||||||
|
|
||||||
The sixth test checks the support for Unicode character properties. It it not
|
The eleventh test checks some internal offsets and code size features; it is
|
||||||
run automatically unless PCRE is built with Unicode property support. To to
|
run only when the default "link size" of 2 is set (in other cases the sizes
|
||||||
this you must set --enable-unicode-properties when running "configure".
|
change) and when Unicode property support is enabled.
|
||||||
|
|
||||||
The seventh, eighth, and ninth tests check the pcre_dfa_exec() alternative
|
The twelfth test is run only when JIT support is available, and the thirteenth
|
||||||
matching function, in non-UTF-8 mode, UTF-8 mode, and UTF-8 mode with Unicode
|
test is run only when JIT support is not available. They test some JIT-specific
|
||||||
property support, respectively. The eighth and ninth tests are not run
|
features such as information output from pcretest about JIT compilation.
|
||||||
automatically unless PCRE is build with the relevant support.
|
|
||||||
|
|
||||||
|
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
|
||||||
|
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit mode.
|
||||||
|
These are tests that generate different output in the two modes. They are for
|
||||||
|
general cases, UTF-8/16/32 support, and Unicode property support, respectively.
|
||||||
|
|
||||||
|
The twentieth test is run only in 16/32-bit mode. It tests some specific
|
||||||
|
16/32-bit features of the DFA matching engine.
|
||||||
|
|
||||||
|
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when the
|
||||||
|
link size is set to 2 for the 16-bit library. They test reloading pre-compiled patterns.
|
||||||
|
|
||||||
|
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are for
|
||||||
|
general cases, and UTF-16 support, respectively.
|
||||||
|
|
||||||
|
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are for
|
||||||
|
general cases, and UTF-32 support, respectively.
|
||||||
|
|
||||||
Character tables
|
Character tables
|
||||||
----------------
|
----------------
|
||||||
@ -627,7 +777,9 @@ will cause PCRE to malfunction.
|
|||||||
File manifest
|
File manifest
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
The distribution should contain the following files:
|
The distribution should contain the files listed below. Where a file name is
|
||||||
|
given as pcre[16|32]_xxx it means that there are three files, one with the name
|
||||||
|
pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
||||||
|
|
||||||
(A) Source files of the PCRE library functions and their headers:
|
(A) Source files of the PCRE library functions and their headers:
|
||||||
|
|
||||||
@ -636,33 +788,42 @@ The distribution should contain the following files:
|
|||||||
|
|
||||||
pcre_chartables.c.dist a default set of character tables that assume ASCII
|
pcre_chartables.c.dist a default set of character tables that assume ASCII
|
||||||
coding; used, unless --enable-rebuild-chartables is
|
coding; used, unless --enable-rebuild-chartables is
|
||||||
specified, by copying to pcre_chartables.c
|
specified, by copying to pcre[16]_chartables.c
|
||||||
|
|
||||||
|
pcreposix.c )
|
||||||
|
pcre[16|32]_byte_order.c )
|
||||||
|
pcre[16|32]_compile.c )
|
||||||
|
pcre[16|32]_config.c )
|
||||||
|
pcre[16|32]_dfa_exec.c )
|
||||||
|
pcre[16|32]_exec.c )
|
||||||
|
pcre[16|32]_fullinfo.c )
|
||||||
|
pcre[16|32]_get.c ) sources for the functions in the library,
|
||||||
|
pcre[16|32]_globals.c ) and some internal functions that they use
|
||||||
|
pcre[16|32]_jit_compile.c )
|
||||||
|
pcre[16|32]_maketables.c )
|
||||||
|
pcre[16|32]_newline.c )
|
||||||
|
pcre[16|32]_refcount.c )
|
||||||
|
pcre[16|32]_string_utils.c )
|
||||||
|
pcre[16|32]_study.c )
|
||||||
|
pcre[16|32]_tables.c )
|
||||||
|
pcre[16|32]_ucd.c )
|
||||||
|
pcre[16|32]_version.c )
|
||||||
|
pcre[16|32]_xclass.c )
|
||||||
|
pcre_ord2utf8.c )
|
||||||
|
pcre_valid_utf8.c )
|
||||||
|
pcre16_ord2utf16.c )
|
||||||
|
pcre16_utf16_utils.c )
|
||||||
|
pcre16_valid_utf16.c )
|
||||||
|
pcre32_utf32_utils.c )
|
||||||
|
pcre32_valid_utf32.c )
|
||||||
|
|
||||||
|
pcre[16|32]_printint.c ) debugging function that is used by pcretest,
|
||||||
|
) and can also be #included in pcre_compile()
|
||||||
|
|
||||||
pcreposix.c )
|
|
||||||
pcre_compile.c )
|
|
||||||
pcre_config.c )
|
|
||||||
pcre_dfa_exec.c )
|
|
||||||
pcre_exec.c )
|
|
||||||
pcre_fullinfo.c )
|
|
||||||
pcre_get.c ) sources for the functions in the library,
|
|
||||||
pcre_globals.c ) and some internal functions that they use
|
|
||||||
pcre_info.c )
|
|
||||||
pcre_maketables.c )
|
|
||||||
pcre_newline.c )
|
|
||||||
pcre_ord2utf8.c )
|
|
||||||
pcre_refcount.c )
|
|
||||||
pcre_study.c )
|
|
||||||
pcre_tables.c )
|
|
||||||
pcre_try_flipped.c )
|
|
||||||
pcre_ucd.c )
|
|
||||||
pcre_valid_utf8.c )
|
|
||||||
pcre_version.c )
|
|
||||||
pcre_xclass.c )
|
|
||||||
pcre_printint.src ) debugging function that is #included in pcretest,
|
|
||||||
) and can also be #included in pcre_compile()
|
|
||||||
pcre.h.in template for pcre.h when built by "configure"
|
pcre.h.in template for pcre.h when built by "configure"
|
||||||
pcreposix.h header for the external POSIX wrapper API
|
pcreposix.h header for the external POSIX wrapper API
|
||||||
pcre_internal.h header for internal use
|
pcre_internal.h header for internal use
|
||||||
|
sljit/* 16 files that make up the JIT compiler
|
||||||
ucp.h header for Unicode property handling
|
ucp.h header for Unicode property handling
|
||||||
|
|
||||||
config.h.in template for config.h, which is built by "configure"
|
config.h.in template for config.h, which is built by "configure"
|
||||||
@ -699,7 +860,8 @@ The distribution should contain the following files:
|
|||||||
Makefile.am ) the automake input that was used to create
|
Makefile.am ) the automake input that was used to create
|
||||||
) Makefile.in
|
) Makefile.in
|
||||||
NEWS important changes in this release
|
NEWS important changes in this release
|
||||||
NON-UNIX-USE notes on building PCRE on non-Unix systems
|
NON-UNIX-USE the previous name for NON-AUTOTOOLS-BUILD
|
||||||
|
NON-AUTOTOOLS-BUILD notes on building PCRE without using autotools
|
||||||
PrepareRelease script to make preparations for "make dist"
|
PrepareRelease script to make preparations for "make dist"
|
||||||
README this file
|
README this file
|
||||||
RunTest a Unix shell script for running tests
|
RunTest a Unix shell script for running tests
|
||||||
@ -712,7 +874,7 @@ The distribution should contain the following files:
|
|||||||
) "configure" and config.h
|
) "configure" and config.h
|
||||||
depcomp ) script to find program dependencies, generated by
|
depcomp ) script to find program dependencies, generated by
|
||||||
) automake
|
) automake
|
||||||
doc/*.3 man page sources for the PCRE functions
|
doc/*.3 man page sources for PCRE
|
||||||
doc/*.1 man page sources for pcregrep and pcretest
|
doc/*.1 man page sources for pcregrep and pcretest
|
||||||
doc/index.html.src the base HTML page
|
doc/index.html.src the base HTML page
|
||||||
doc/html/* HTML documentation
|
doc/html/* HTML documentation
|
||||||
@ -720,7 +882,10 @@ The distribution should contain the following files:
|
|||||||
doc/pcretest.txt plain text documentation of test program
|
doc/pcretest.txt plain text documentation of test program
|
||||||
doc/perltest.txt plain text documentation of Perl test program
|
doc/perltest.txt plain text documentation of Perl test program
|
||||||
install-sh a shell script for installing files
|
install-sh a shell script for installing files
|
||||||
|
libpcre16.pc.in template for libpcre16.pc for pkg-config
|
||||||
|
libpcre32.pc.in template for libpcre32.pc for pkg-config
|
||||||
libpcre.pc.in template for libpcre.pc for pkg-config
|
libpcre.pc.in template for libpcre.pc for pkg-config
|
||||||
|
libpcreposix.pc.in template for libpcreposix.pc for pkg-config
|
||||||
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
|
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
|
||||||
ltmain.sh file used to build a libtool script
|
ltmain.sh file used to build a libtool script
|
||||||
missing ) common stub for a few missing GNU programs while
|
missing ) common stub for a few missing GNU programs while
|
||||||
@ -728,17 +893,20 @@ The distribution should contain the following files:
|
|||||||
mkinstalldirs script for making install directories
|
mkinstalldirs script for making install directories
|
||||||
perltest.pl Perl test program
|
perltest.pl Perl test program
|
||||||
pcre-config.in source of script which retains PCRE information
|
pcre-config.in source of script which retains PCRE information
|
||||||
|
pcre_jit_test.c test program for the JIT compiler
|
||||||
pcrecpp_unittest.cc )
|
pcrecpp_unittest.cc )
|
||||||
pcre_scanner_unittest.cc ) test programs for the C++ wrapper
|
pcre_scanner_unittest.cc ) test programs for the C++ wrapper
|
||||||
pcre_stringpiece_unittest.cc )
|
pcre_stringpiece_unittest.cc )
|
||||||
testdata/testinput* test data for main library tests
|
testdata/testinput* test data for main library tests
|
||||||
testdata/testoutput* expected test results
|
testdata/testoutput* expected test results
|
||||||
testdata/grep* input and output for pcregrep tests
|
testdata/grep* input and output for pcregrep tests
|
||||||
|
testdata/* other supporting test files
|
||||||
|
|
||||||
(D) Auxiliary files for cmake support
|
(D) Auxiliary files for cmake support
|
||||||
|
|
||||||
cmake/COPYING-CMAKE-SCRIPTS
|
cmake/COPYING-CMAKE-SCRIPTS
|
||||||
cmake/FindPackageHandleStandardArgs.cmake
|
cmake/FindPackageHandleStandardArgs.cmake
|
||||||
|
cmake/FindEditline.cmake
|
||||||
cmake/FindReadline.cmake
|
cmake/FindReadline.cmake
|
||||||
CMakeLists.txt
|
CMakeLists.txt
|
||||||
config-cmake.h.in
|
config-cmake.h.in
|
||||||
@ -764,4 +932,4 @@ The distribution should contain the following files:
|
|||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 21 March 2009
|
Last updated: 27 October 2012
|
||||||
|
@ -2,67 +2,104 @@
|
|||||||
|
|
||||||
# Run pcregrep tests. The assumption is that the PCRE tests check the library
|
# Run pcregrep tests. The assumption is that the PCRE tests check the library
|
||||||
# itself. What we are checking here is the file handling and options that are
|
# itself. What we are checking here is the file handling and options that are
|
||||||
# supported by pcregrep.
|
# supported by pcregrep. This script must be run in the build directory.
|
||||||
|
|
||||||
# Set the C locale, so that sort(1) behaves predictably.
|
# Set the C locale, so that sort(1) behaves predictably.
|
||||||
|
|
||||||
LC_ALL=C
|
LC_ALL=C
|
||||||
export LC_ALL
|
export LC_ALL
|
||||||
|
|
||||||
pcregrep=`pwd`/pcregrep
|
# Remove any non-default colouring and aliases that the caller may have set.
|
||||||
|
|
||||||
echo " "
|
unset PCREGREP_COLOUR PCREGREP_COLOR
|
||||||
echo "Testing pcregrep"
|
unset cp ls mv rm
|
||||||
$pcregrep -V
|
|
||||||
|
# Remember the current (build) directory, set the program to be tested, and
|
||||||
|
# valgrind settings when requested.
|
||||||
|
|
||||||
|
builddir=`pwd`
|
||||||
|
pcregrep=$builddir/pcregrep
|
||||||
|
|
||||||
cf="diff -ub"
|
|
||||||
valgrind=
|
valgrind=
|
||||||
|
|
||||||
while [ $# -gt 0 ] ; do
|
while [ $# -gt 0 ] ; do
|
||||||
case $1 in
|
case $1 in
|
||||||
valgrind) valgrind="valgrind -q --leak-check=no";;
|
valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all";;
|
||||||
*) echo "Unknown argument $1"; exit 1;;
|
*) echo "RunGrepTest: Unknown argument $1"; exit 1;;
|
||||||
esac
|
esac
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
|
|
||||||
# If PCRE has been built in a directory other than the source directory, and
|
echo " "
|
||||||
# this test is being run from "make check" as usual, then $(srcdir) will be
|
pcregrep_version=`$pcregrep -V`
|
||||||
# set. If not, set it to the current directory. We then arrange to run the
|
if [ "$valgrind" = "" ] ; then
|
||||||
# pcregrep command in the source directory so that the file names that appear
|
echo "Testing $pcregrep_version"
|
||||||
# in the output are always the same.
|
else
|
||||||
|
echo "Testing $pcregrep_version using valgrind"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set up a suitable "diff" command for comparison. Some systems have a diff
|
||||||
|
# that lacks a -u option. Try to deal with this; better do the test for the -b
|
||||||
|
# option as well.
|
||||||
|
|
||||||
|
cf="diff"
|
||||||
|
diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b"
|
||||||
|
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
||||||
|
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
|
||||||
|
|
||||||
|
# If this test is being run from "make check", $srcdir will be set. If not, set
|
||||||
|
# it to the current or parent directory, whichever one contains the test data.
|
||||||
|
# Subsequently, we run most of the pcregrep tests in the source directory so
|
||||||
|
# that the file names in the output are always the same.
|
||||||
|
|
||||||
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
|
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
|
||||||
srcdir=.
|
if [ -d "./testdata" ] ; then
|
||||||
|
srcdir=.
|
||||||
|
elif [ -d "../testdata" ] ; then
|
||||||
|
srcdir=..
|
||||||
|
else
|
||||||
|
echo "Cannot find the testdata directory"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check for the availability of UTF-8 support
|
# Check for the availability of UTF-8 support
|
||||||
|
|
||||||
./pcretest -C | ./pcregrep "No UTF-8 support" >/dev/null
|
./pcretest -C utf >/dev/null
|
||||||
utf8=$?
|
utf8=$?
|
||||||
|
|
||||||
|
echo "Testing pcregrep main features"
|
||||||
|
|
||||||
echo "---------------------------- Test 1 ------------------------------" >testtry
|
echo "---------------------------- Test 1 ------------------------------" >testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 2 ------------------------------" >>testtry
|
echo "---------------------------- Test 2 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 3 ------------------------------" >>testtry
|
echo "---------------------------- Test 3 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 4 ------------------------------" >>testtry
|
echo "---------------------------- Test 4 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 5 ------------------------------" >>testtry
|
echo "---------------------------- Test 5 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 6 ------------------------------" >>testtry
|
echo "---------------------------- Test 6 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 7 ------------------------------" >>testtry
|
echo "---------------------------- Test 7 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 8 ------------------------------" >>testtry
|
echo "---------------------------- Test 8 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 9 ------------------------------" >>testtry
|
echo "---------------------------- Test 9 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
@ -74,69 +111,92 @@ echo "RC=$?" >>testtry
|
|||||||
|
|
||||||
echo "---------------------------- Test 11 -----------------------------" >>testtry
|
echo "---------------------------- Test 11 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 12 -----------------------------" >>testtry
|
echo "---------------------------- Test 12 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 13 -----------------------------" >>testtry
|
echo "---------------------------- Test 13 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist ./testdata/grepinputx) >>testtry
|
echo seventeen >testtemp1
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1 ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 14 -----------------------------" >>testtry
|
echo "---------------------------- Test 14 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 15 -----------------------------" >>testtry
|
echo "---------------------------- Test 15 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry
|
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 16 -----------------------------" >>testtry
|
echo "---------------------------- Test 16 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry
|
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 17 -----------------------------" >>testtry
|
echo "---------------------------- Test 17 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 18 -----------------------------" >>testtry
|
echo "---------------------------- Test 18 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 19 -----------------------------" >>testtry
|
echo "---------------------------- Test 19 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 20 -----------------------------" >>testtry
|
echo "---------------------------- Test 20 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 21 -----------------------------" >>testtry
|
echo "---------------------------- Test 21 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 22 -----------------------------" >>testtry
|
echo "---------------------------- Test 22 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 23 -----------------------------" >>testtry
|
echo "---------------------------- Test 23 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 24 -----------------------------" >>testtry
|
echo "---------------------------- Test 24 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 25 -----------------------------" >>testtry
|
echo "---------------------------- Test 25 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 26 -----------------------------" >>testtry
|
echo "---------------------------- Test 26 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 27 -----------------------------" >>testtry
|
echo "---------------------------- Test 27 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 28 -----------------------------" >>testtry
|
echo "---------------------------- Test 28 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 29 -----------------------------" >>testtry
|
echo "---------------------------- Test 29 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 30 -----------------------------" >>testtry
|
echo "---------------------------- Test 30 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 31 -----------------------------" >>testtry
|
echo "---------------------------- Test 31 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 32 -----------------------------" >>testtry
|
echo "---------------------------- Test 32 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 33 -----------------------------" >>testtry
|
echo "---------------------------- Test 33 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
||||||
@ -147,11 +207,11 @@ echo "---------------------------- Test 34 -----------------------------" >>test
|
|||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 35 -----------------------------" >>testtry
|
echo "---------------------------- Test 35 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --exclude_dir='^\.' 'fox' ./testdata) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 36 -----------------------------" >>testtry
|
echo "---------------------------- Test 36 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude_dir='^\.' 'fox' ./testdata | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 37 -----------------------------" >>testtry
|
echo "---------------------------- Test 37 -----------------------------" >>testtry
|
||||||
@ -162,60 +222,270 @@ cat teststderr >>testtry
|
|||||||
|
|
||||||
echo "---------------------------- Test 38 ------------------------------" >>testtry
|
echo "---------------------------- Test 38 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 39 ------------------------------" >>testtry
|
echo "---------------------------- Test 39 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 40 ------------------------------" >>testtry
|
echo "---------------------------- Test 40 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
|
||||||
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
|
||||||
|
|
||||||
echo "---------------------------- Test 42 ------------------------------" >>testtry
|
echo "---------------------------- Test 42 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 43 ------------------------------" >>testtry
|
echo "---------------------------- Test 43 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -on -e before -e zero -e after ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 44 ------------------------------" >>testtry
|
echo "---------------------------- Test 44 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 45 ------------------------------" >>testtry
|
echo "---------------------------- Test 45 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -e abc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry
|
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 46 ------------------------------" >>testtry
|
echo "---------------------------- Test 46 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
|
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry
|
||||||
elephant" ./testdata/grepinput) >>testtry
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 47 ------------------------------" >>testtry
|
echo "---------------------------- Test 47 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
|
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
|
||||||
elephant" ./testdata/grepinput) >>testtry
|
elephant" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 48 ------------------------------" >>testtry
|
echo "---------------------------- Test 48 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
|
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
|
||||||
elephant" ./testdata/grepinput) >>testtry
|
elephant" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 49 ------------------------------" >>testtry
|
echo "---------------------------- Test 49 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
|
||||||
|
elephant" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 50 ------------------------------" >>testtry
|
echo "---------------------------- Test 50 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry
|
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 51 ------------------------------" >>testtry
|
echo "---------------------------- Test 51 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 52 ------------------------------" >>testtry
|
echo "---------------------------- Test 52 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 53 ------------------------------" >>testtry
|
echo "---------------------------- Test 53 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test 54 -----------------------------" >>testtry
|
echo "---------------------------- Test 54 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 55 -----------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 56 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 57 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 58 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 59 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 60 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 61 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 62 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 63 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 64 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 65 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 66 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 67 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 68 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 69 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 70 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 71 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 72 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 73 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 74 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 75 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 76 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 77 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 78 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 79 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 80 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 81 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 82 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 83 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 84 -----------------------------" >>testtry
|
||||||
|
echo testdata/grepinput3 >testtemp1
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1 "fox|complete|t7") >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 85 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 86 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 87 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 88 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 89 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 90 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 91 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 92 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 93 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 94 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 95 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 96 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 97 -----------------------------" >>testtry
|
||||||
|
echo "grepinput$" >testtemp1
|
||||||
|
echo "grepinput8" >>testtemp1
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 98 -----------------------------" >>testtry
|
||||||
|
echo "grepinput$" >testtemp1
|
||||||
|
echo "grepinput8" >>testtemp1
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 99 -----------------------------" >>testtry
|
||||||
|
echo "grepinput$" >testtemp1
|
||||||
|
echo "grepinput8" >testtemp2
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1 --exclude-from=$builddir/testtemp2 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 100 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 101 ------------------------------" >>testtry
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
|
||||||
# Now compare the results.
|
# Now compare the results.
|
||||||
|
|
||||||
@ -230,9 +500,11 @@ if [ $utf8 -ne 0 ] ; then
|
|||||||
|
|
||||||
echo "---------------------------- Test U1 ------------------------------" >testtry
|
echo "---------------------------- Test U1 ------------------------------" >testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
echo "---------------------------- Test U2 ------------------------------" >>testtry
|
echo "---------------------------- Test U2 ------------------------------" >>testtry
|
||||||
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
$cf $srcdir/testdata/grepoutput8 testtry
|
$cf $srcdir/testdata/grepoutput8 testtry
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
@ -247,7 +519,7 @@ fi
|
|||||||
# is not \n. Do not use exported files, whose line endings might be changed.
|
# is not \n. Do not use exported files, whose line endings might be changed.
|
||||||
# Instead, create an input file using printf so that its contents are exactly
|
# Instead, create an input file using printf so that its contents are exactly
|
||||||
# what we want. Note the messy fudge to get printf to write a string that
|
# what we want. Note the messy fudge to get printf to write a string that
|
||||||
# starts with a hyphen.
|
# starts with a hyphen. These tests are run in the build directory.
|
||||||
|
|
||||||
echo "Testing pcregrep newline settings"
|
echo "Testing pcregrep newline settings"
|
||||||
printf "abc\rdef\r\nghi\njkl" >testNinput
|
printf "abc\rdef\r\nghi\njkl" >testNinput
|
||||||
@ -263,8 +535,7 @@ pattern=`printf 'def\rjkl'`
|
|||||||
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry
|
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry
|
||||||
|
|
||||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry
|
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry
|
||||||
pattern=`printf 'xxx\r\njkl'`
|
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinput >>testtry
|
||||||
$valgrind $pcregrep -n --newline=crlf -F "$pattern" testNinput >>testtry
|
|
||||||
|
|
||||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry
|
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry
|
||||||
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry
|
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||||
|
1003
tools/pcre/RunTest
1003
tools/pcre/RunTest
File diff suppressed because it is too large
Load Diff
@ -1,39 +1,616 @@
|
|||||||
@rem This file was contributed by Ralf Junker, and touched up by
|
@echo off
|
||||||
@rem Daniel Richard G. Test 10 added by Philip H.
|
@rem This file must use CRLF linebreaks to function properly
|
||||||
@rem Philip H also changed test 3 to use "wintest" files.
|
@rem and requires both pcretest and pcregrep
|
||||||
@rem
|
@rem This file was originally contributed by Ralf Junker, and touched up by
|
||||||
@rem MS Windows batch file to run pcretest on testfiles with the correct
|
@rem Daniel Richard G. Tests 10-12 added by Philip H.
|
||||||
@rem options.
|
@rem Philip H also changed test 3 to use "wintest" files.
|
||||||
@rem
|
@rem
|
||||||
@rem Output is written to a newly created subfolder named "testdata".
|
@rem Updated by Tom Fortmann to support explicit test numbers on the command line.
|
||||||
|
@rem Added argument validation and added error reporting.
|
||||||
setlocal
|
@rem
|
||||||
|
@rem MS Windows batch file to run pcretest on testfiles with the correct
|
||||||
if [%srcdir%]==[] set srcdir=.
|
@rem options.
|
||||||
if [%pcretest%]==[] set pcretest=pcretest
|
@rem
|
||||||
|
@rem Sheri Pierce added logic to skip feature dependent tests
|
||||||
if not exist testout md testout
|
@rem tests 4 5 9 15 and 18 require utf support
|
||||||
|
@rem tests 6 7 10 16 and 19 require ucp support
|
||||||
%pcretest% -q %srcdir%\testdata\testinput1 > testout\testoutput1
|
@rem 11 requires ucp and link size 2
|
||||||
%pcretest% -q %srcdir%\testdata\testinput2 > testout\testoutput2
|
@rem 12 requires presense of jit support
|
||||||
@rem %pcretest% -q %srcdir%\testdata\testinput3 > testout\testoutput3
|
@rem 13 requires absence of jit support
|
||||||
%pcretest% -q %srcdir%\testdata\wintestinput3 > testout\wintestoutput3
|
@rem Sheri P also added override tests for study and jit testing
|
||||||
%pcretest% -q %srcdir%\testdata\testinput4 > testout\testoutput4
|
@rem Zoltan Herczeg added libpcre16 support
|
||||||
%pcretest% -q %srcdir%\testdata\testinput5 > testout\testoutput5
|
@rem Zoltan Herczeg added libpcre32 support
|
||||||
%pcretest% -q %srcdir%\testdata\testinput6 > testout\testoutput6
|
|
||||||
%pcretest% -q -dfa %srcdir%\testdata\testinput7 > testout\testoutput7
|
setlocal enabledelayedexpansion
|
||||||
%pcretest% -q -dfa %srcdir%\testdata\testinput8 > testout\testoutput8
|
if [%srcdir%]==[] (
|
||||||
%pcretest% -q -dfa %srcdir%\testdata\testinput9 > testout\testoutput9
|
if exist testdata\ set srcdir=.)
|
||||||
%pcretest% -q %srcdir%\testdata\testinput10 > testout\testoutput10
|
if [%srcdir%]==[] (
|
||||||
|
if exist ..\testdata\ set srcdir=..)
|
||||||
fc /n %srcdir%\testdata\testoutput1 testout\testoutput1
|
if [%srcdir%]==[] (
|
||||||
fc /n %srcdir%\testdata\testoutput2 testout\testoutput2
|
if exist ..\..\testdata\ set srcdir=..\..)
|
||||||
rem fc /n %srcdir%\testdata\testoutput3 testout\testoutput3
|
if NOT exist %srcdir%\testdata\ (
|
||||||
fc /n %srcdir%\testdata\wintestoutput3 testout\wintestoutput3
|
Error: echo distribution testdata folder not found!
|
||||||
fc /n %srcdir%\testdata\testoutput4 testout\testoutput4
|
call :conferror
|
||||||
fc /n %srcdir%\testdata\testoutput5 testout\testoutput5
|
exit /b 1
|
||||||
fc /n %srcdir%\testdata\testoutput6 testout\testoutput6
|
goto :eof
|
||||||
fc /n %srcdir%\testdata\testoutput7 testout\testoutput7
|
)
|
||||||
fc /n %srcdir%\testdata\testoutput8 testout\testoutput8
|
|
||||||
fc /n %srcdir%\testdata\testoutput9 testout\testoutput9
|
if [%pcretest%]==[] set pcretest=.\pcretest.exe
|
||||||
fc /n %srcdir%\testdata\testoutput10 testout\testoutput10
|
|
||||||
|
echo source dir is %srcdir%
|
||||||
|
echo pcretest=%pcretest%
|
||||||
|
|
||||||
|
if NOT exist %pcretest% (
|
||||||
|
echo Error: %pcretest% not found!
|
||||||
|
echo.
|
||||||
|
call :conferror
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
%pcretest% -C linksize >NUL
|
||||||
|
set link_size=%ERRORLEVEL%
|
||||||
|
%pcretest% -C pcre8 >NUL
|
||||||
|
set support8=%ERRORLEVEL%
|
||||||
|
%pcretest% -C pcre16 >NUL
|
||||||
|
set support16=%ERRORLEVEL%
|
||||||
|
%pcretest% -C pcre32 >NUL
|
||||||
|
set support32=%ERRORLEVEL%
|
||||||
|
%pcretest% -C utf >NUL
|
||||||
|
set utf=%ERRORLEVEL%
|
||||||
|
%pcretest% -C ucp >NUL
|
||||||
|
set ucp=%ERRORLEVEL%
|
||||||
|
%pcretest% -C jit >NUL
|
||||||
|
set jit=%ERRORLEVEL%
|
||||||
|
|
||||||
|
if %support8% EQU 1 (
|
||||||
|
if not exist testout8 md testout8
|
||||||
|
if not exist testoutstudy8 md testoutstudy8
|
||||||
|
if not exist testoutjit8 md testoutjit8
|
||||||
|
)
|
||||||
|
|
||||||
|
if %support16% EQU 1 (
|
||||||
|
if not exist testout16 md testout16
|
||||||
|
if not exist testoutstudy16 md testoutstudy16
|
||||||
|
if not exist testoutjit16 md testoutjit16
|
||||||
|
)
|
||||||
|
|
||||||
|
if %support16% EQU 1 (
|
||||||
|
if not exist testout32 md testout32
|
||||||
|
if not exist testoutstudy32 md testoutstudy32
|
||||||
|
if not exist testoutjit32 md testoutjit32
|
||||||
|
)
|
||||||
|
|
||||||
|
set do1=no
|
||||||
|
set do2=no
|
||||||
|
set do3=no
|
||||||
|
set do4=no
|
||||||
|
set do5=no
|
||||||
|
set do6=no
|
||||||
|
set do7=no
|
||||||
|
set do8=no
|
||||||
|
set do9=no
|
||||||
|
set do10=no
|
||||||
|
set do11=no
|
||||||
|
set do12=no
|
||||||
|
set do13=no
|
||||||
|
set do14=no
|
||||||
|
set do15=no
|
||||||
|
set do16=no
|
||||||
|
set do17=no
|
||||||
|
set do18=no
|
||||||
|
set do19=no
|
||||||
|
set do20=no
|
||||||
|
set do21=no
|
||||||
|
set do22=no
|
||||||
|
set do23=no
|
||||||
|
set do24=no
|
||||||
|
set do25=no
|
||||||
|
set do26=no
|
||||||
|
set all=yes
|
||||||
|
|
||||||
|
for %%a in (%*) do (
|
||||||
|
set valid=no
|
||||||
|
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26) do if %%v == %%a set valid=yes
|
||||||
|
if "!valid!" == "yes" (
|
||||||
|
set do%%a=yes
|
||||||
|
set all=no
|
||||||
|
) else (
|
||||||
|
echo Invalid test number - %%a!
|
||||||
|
echo Usage %0 [ test_number ] ...
|
||||||
|
echo Where test_number is one or more optional test numbers 1 through 26, default is all tests.
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
)
|
||||||
|
set failed="no"
|
||||||
|
|
||||||
|
if "%all%" == "yes" (
|
||||||
|
set do1=yes
|
||||||
|
set do2=yes
|
||||||
|
set do3=yes
|
||||||
|
set do4=yes
|
||||||
|
set do5=yes
|
||||||
|
set do6=yes
|
||||||
|
set do7=yes
|
||||||
|
set do8=yes
|
||||||
|
set do9=yes
|
||||||
|
set do10=yes
|
||||||
|
set do11=yes
|
||||||
|
set do12=yes
|
||||||
|
set do13=yes
|
||||||
|
set do14=yes
|
||||||
|
set do15=yes
|
||||||
|
set do16=yes
|
||||||
|
set do17=yes
|
||||||
|
set do18=yes
|
||||||
|
set do19=yes
|
||||||
|
set do20=yes
|
||||||
|
set do21=yes
|
||||||
|
set do22=yes
|
||||||
|
set do23=yes
|
||||||
|
set do24=yes
|
||||||
|
set do25=yes
|
||||||
|
set do26=yes
|
||||||
|
)
|
||||||
|
|
||||||
|
@echo RunTest.bat's pcretest output is written to newly created subfolders named
|
||||||
|
@echo testout, testoutstudy and testoutjit.
|
||||||
|
@echo.
|
||||||
|
|
||||||
|
set mode=
|
||||||
|
set bits=8
|
||||||
|
|
||||||
|
:nextMode
|
||||||
|
if "%mode%" == "" (
|
||||||
|
if %support8% EQU 0 goto modeSkip
|
||||||
|
echo.
|
||||||
|
echo ---- Testing 8-bit library ----
|
||||||
|
echo.
|
||||||
|
)
|
||||||
|
if "%mode%" == "-16" (
|
||||||
|
if %support16% EQU 0 goto modeSkip
|
||||||
|
echo.
|
||||||
|
echo ---- Testing 16-bit library ----
|
||||||
|
echo.
|
||||||
|
)
|
||||||
|
if "%mode%" == "-32" (
|
||||||
|
if %support32% EQU 0 goto modeSkip
|
||||||
|
echo.
|
||||||
|
echo ---- Testing 32-bit library ----
|
||||||
|
echo.
|
||||||
|
)
|
||||||
|
if "%do1%" == "yes" call :do1
|
||||||
|
if "%do2%" == "yes" call :do2
|
||||||
|
if "%do3%" == "yes" call :do3
|
||||||
|
if "%do4%" == "yes" call :do4
|
||||||
|
if "%do5%" == "yes" call :do5
|
||||||
|
if "%do6%" == "yes" call :do6
|
||||||
|
if "%do7%" == "yes" call :do7
|
||||||
|
if "%do8%" == "yes" call :do8
|
||||||
|
if "%do9%" == "yes" call :do9
|
||||||
|
if "%do10%" == "yes" call :do10
|
||||||
|
if "%do11%" == "yes" call :do11
|
||||||
|
if "%do12%" == "yes" call :do12
|
||||||
|
if "%do13%" == "yes" call :do13
|
||||||
|
if "%do14%" == "yes" call :do14
|
||||||
|
if "%do15%" == "yes" call :do15
|
||||||
|
if "%do16%" == "yes" call :do16
|
||||||
|
if "%do17%" == "yes" call :do17
|
||||||
|
if "%do18%" == "yes" call :do18
|
||||||
|
if "%do19%" == "yes" call :do19
|
||||||
|
if "%do20%" == "yes" call :do20
|
||||||
|
if "%do21%" == "yes" call :do21
|
||||||
|
if "%do22%" == "yes" call :do22
|
||||||
|
if "%do23%" == "yes" call :do23
|
||||||
|
if "%do24%" == "yes" call :do24
|
||||||
|
if "%do25%" == "yes" call :do25
|
||||||
|
if "%do26%" == "yes" call :do26
|
||||||
|
:modeSkip
|
||||||
|
if "%mode%" == "" (
|
||||||
|
set mode=-16
|
||||||
|
set bits=16
|
||||||
|
goto nextMode
|
||||||
|
)
|
||||||
|
if "%mode%" == "-16" (
|
||||||
|
set mode=-32
|
||||||
|
set bits=32
|
||||||
|
goto nextMode
|
||||||
|
)
|
||||||
|
|
||||||
|
@rem If mode is -32, testing is finished
|
||||||
|
if %failed% == "yes" (
|
||||||
|
echo In above output, one or more of the various tests failed!
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
echo All OK
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:runsub
|
||||||
|
@rem Function to execute pcretest and compare the output
|
||||||
|
@rem Arguments are as follows:
|
||||||
|
@rem
|
||||||
|
@rem 1 = test number
|
||||||
|
@rem 2 = outputdir
|
||||||
|
@rem 3 = test name use double quotes
|
||||||
|
@rem 4 - 9 = pcretest options
|
||||||
|
|
||||||
|
if [%1] == [] (
|
||||||
|
echo Missing test number argument!
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if [%2] == [] (
|
||||||
|
echo Missing outputdir!
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if [%3] == [] (
|
||||||
|
echo Missing test name argument!
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
set testinput=testinput%1
|
||||||
|
set testoutput=testoutput%1
|
||||||
|
if exist %srcdir%\testdata\win%testinput% (
|
||||||
|
set testinput=wintestinput%1
|
||||||
|
set testoutput=wintestoutput%1
|
||||||
|
)
|
||||||
|
|
||||||
|
echo Test %1: %3
|
||||||
|
%pcretest% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput%
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo. failed executing command-line:
|
||||||
|
echo. %pcretest% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput%
|
||||||
|
set failed="yes"
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
|
||||||
|
set type=
|
||||||
|
if [%1]==[11] (
|
||||||
|
set type=-%bits%
|
||||||
|
)
|
||||||
|
if [%1]==[18] (
|
||||||
|
set type=-%bits%
|
||||||
|
)
|
||||||
|
if [%1]==[21] (
|
||||||
|
set type=-%bits%
|
||||||
|
)
|
||||||
|
if [%1]==[22] (
|
||||||
|
set type=-%bits%
|
||||||
|
)
|
||||||
|
|
||||||
|
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
|
||||||
|
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
|
||||||
|
if [%1]==[2] (
|
||||||
|
echo.
|
||||||
|
echo ** Test 2 requires a lot of stack. PCRE can be configured to
|
||||||
|
echo ** use heap for recursion. Otherwise, to pass Test 2
|
||||||
|
echo ** you generally need to allocate 8 mb stack to PCRE.
|
||||||
|
echo ** See the 'pcrestack' page for a discussion of PCRE's
|
||||||
|
echo ** stack usage.
|
||||||
|
echo.
|
||||||
|
)
|
||||||
|
if [%1]==[3] (
|
||||||
|
echo.
|
||||||
|
echo ** Test 3 failure usually means french locale is not
|
||||||
|
echo ** available on the system, rather than a bug or problem with PCRE.
|
||||||
|
echo.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
|
||||||
|
set failed="yes"
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
|
||||||
|
echo. Passed.
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do1
|
||||||
|
call :runsub 1 testout "Main functionality (Compatible with Perl >= 5.10)" -q
|
||||||
|
call :runsub 1 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do2
|
||||||
|
call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
|
||||||
|
call :runsub 2 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do3
|
||||||
|
call :runsub 3 testout "Locale-specific features" -q
|
||||||
|
call :runsub 3 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do4
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 4 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 4 testout "UTF-%bits% support - (Compatible with Perl >= 5.10)" -q
|
||||||
|
call :runsub 4 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do5
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 5 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits%" -q
|
||||||
|
call :runsub 5 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do6
|
||||||
|
if %ucp% EQU 0 (
|
||||||
|
echo Test 6 Skipped due to absence of Unicode property support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 6 testout "Unicode property support (Compatible with Perl >= 5.10)" -q
|
||||||
|
call :runsub 6 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 6 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do7
|
||||||
|
if %ucp% EQU 0 (
|
||||||
|
echo Test 7 Skipped due to absence of Unicode property support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 7 testout "API, internals, and non-Perl stuff for Unicode property support" -q
|
||||||
|
call :runsub 7 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 7 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do8
|
||||||
|
call :runsub 8 testout "DFA matching main functionality" -q -dfa
|
||||||
|
call :runsub 8 testoutstudy "Test with Study Override" -q -dfa -s
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do9
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 9 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 9 testout "DFA matching with UTF-%bits%" -q -dfa
|
||||||
|
call :runsub 9 testoutstudy "Test with Study Override" -q -dfa -s
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do10
|
||||||
|
if %ucp% EQU 0 (
|
||||||
|
echo Test 10 Skipped due to absence of Unicode property support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 10 testout "DFA matching with Unicode properties" -q -dfa
|
||||||
|
call :runsub 10 testoutstudy "Test with Study Override" -q -dfa -s
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do11
|
||||||
|
if NOT %link_size% EQU 2 (
|
||||||
|
echo Test 11 Skipped because link size is not 2.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %ucp% EQU 0 (
|
||||||
|
echo Test 11 Skipped due to absence of Unicode property support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 11 testout "Internal offsets and code size tests" -q
|
||||||
|
call :runsub 11 testoutstudy "Test with Study Override" -q -s
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do12
|
||||||
|
if %jit% EQU 0 (
|
||||||
|
echo Test 12 Skipped due to absence of JIT support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 12 testout "JIT-specific features (JIT available)" -q
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do13
|
||||||
|
if %jit% EQU 1 (
|
||||||
|
echo Test 13 Skipped due to presence of JIT support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 13 testout "JIT-specific features (JIT not available)" -q
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do14
|
||||||
|
if NOT %bits% EQU 8 (
|
||||||
|
echo Test 14 Skipped when running 16/32-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
copy /Y %srcdir%\testdata\saved16 testsaved16
|
||||||
|
copy /Y %srcdir%\testdata\saved32 testsaved32
|
||||||
|
call :runsub 14 testout "Specials for the basic 8-bit library" -q
|
||||||
|
call :runsub 14 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 14 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do15
|
||||||
|
if NOT %bits% EQU 8 (
|
||||||
|
echo Test 15 Skipped when running 16/32-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 15 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 15 testout "Specials for the 8-bit library with UTF-%bits% support" -q
|
||||||
|
call :runsub 15 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 15 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do16
|
||||||
|
if NOT %bits% EQU 8 (
|
||||||
|
echo Test 16 Skipped when running 16/32-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %ucp% EQU 0 (
|
||||||
|
echo Test 16 Skipped due to absence of Unicode property support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 16 testout "Specials for the 8-bit library with Unicode propery support" -q
|
||||||
|
call :runsub 16 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 16 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do17
|
||||||
|
if %bits% EQU 8 (
|
||||||
|
echo Test 17 Skipped when running 8-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 17 testout "Specials for the basic 16/32-bit library" -q
|
||||||
|
call :runsub 17 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 17 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do18
|
||||||
|
if %bits% EQU 8 (
|
||||||
|
echo Test 18 Skipped when running 8-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 18 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 18 testout "Specials for the 16/32-bit library with UTF-%bits% support" -q
|
||||||
|
call :runsub 18 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 18 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do19
|
||||||
|
if %bits% EQU 8 (
|
||||||
|
echo Test 19 Skipped when running 8-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %ucp% EQU 0 (
|
||||||
|
echo Test 19 Skipped due to absence of Unicode property support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 19 testout "Specials for the 16/32-bit library with Unicode property support" -q
|
||||||
|
call :runsub 19 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 19 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do20
|
||||||
|
if %bits% EQU 8 (
|
||||||
|
echo Test 20 Skipped when running 8-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 20 testout "DFA specials for the basic 16/32-bit library" -q -dfa
|
||||||
|
call :runsub 20 testoutstudy "Test with Study Override" -q -dfa -s
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do21
|
||||||
|
if %bits% EQU 8 (
|
||||||
|
echo Test 21 Skipped when running 8-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if NOT %link_size% EQU 2 (
|
||||||
|
echo Test 21 Skipped because link size is not 2.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
copy /Y %srcdir%\testdata\saved8 testsaved8
|
||||||
|
copy /Y %srcdir%\testdata\saved16LE-1 testsaved16LE-1
|
||||||
|
copy /Y %srcdir%\testdata\saved16BE-1 testsaved16BE-1
|
||||||
|
copy /Y %srcdir%\testdata\saved32LE-1 testsaved32LE-1
|
||||||
|
copy /Y %srcdir%\testdata\saved32BE-1 testsaved32BE-1
|
||||||
|
call :runsub 21 testout "Reloads for the basic 16/32-bit library" -q
|
||||||
|
call :runsub 21 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do22
|
||||||
|
if %bits% EQU 8 (
|
||||||
|
echo Test 22 Skipped when running 8-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 22 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if NOT %link_size% EQU 2 (
|
||||||
|
echo Test 22 Skipped because link size is not 2.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
copy /Y %srcdir%\testdata\saved16LE-2 testsaved16LE-2
|
||||||
|
copy /Y %srcdir%\testdata\saved16BE-2 testsaved16BE-2
|
||||||
|
copy /Y %srcdir%\testdata\saved32LE-2 testsaved32LE-2
|
||||||
|
copy /Y %srcdir%\testdata\saved32BE-2 testsaved32BE-2
|
||||||
|
call :runsub 22 testout "Reloads for the 16/32-bit library with UTF-16/32 support" -q
|
||||||
|
call :runsub 22 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do23
|
||||||
|
if NOT %bits% EQU 16 (
|
||||||
|
echo Test 23 Skipped when running 8/32-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 23 testout "Specials for the 16-bit library" -q
|
||||||
|
call :runsub 23 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 23 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do24
|
||||||
|
if NOT %bits% EQU 16 (
|
||||||
|
echo Test 24 Skipped when running 8/32-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 24 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 24 testout "Specials for the 16-bit library with UTF-16 support" -q
|
||||||
|
call :runsub 24 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 24 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do25
|
||||||
|
if NOT %bits% EQU 32 (
|
||||||
|
echo Test 25 Skipped when running 8/16-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 25 testout "Specials for the 32-bit library" -q
|
||||||
|
call :runsub 25 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 25 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do26
|
||||||
|
if NOT %bits% EQU 32 (
|
||||||
|
echo Test 26 Skipped when running 8/16-bit tests.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %utf% EQU 0 (
|
||||||
|
echo Test 26 Skipped due to absence of UTF-%bits% support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 26 testout "Specials for the 32-bit library with UTF-32 support" -q
|
||||||
|
call :runsub 26 testoutstudy "Test with Study Override" -q -s
|
||||||
|
if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -s+
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:conferror
|
||||||
|
@echo.
|
||||||
|
@echo Either your build is incomplete or you have a configuration error.
|
||||||
|
@echo.
|
||||||
|
@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS"
|
||||||
|
@echo project, pcre_test.bat defines variables and automatically calls RunTest.bat.
|
||||||
|
@echo For manual testing of all available features, after configuring with cmake
|
||||||
|
@echo and building, you can run the built pcre_test.bat. For best results with
|
||||||
|
@echo cmake builds and tests avoid directories with full path names that include
|
||||||
|
@echo spaces for source or build.
|
||||||
|
@echo.
|
||||||
|
@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed
|
||||||
|
@echo for input and verification should be found automatically when (from the
|
||||||
|
@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat
|
||||||
|
@echo runs all tests compatible with the linked pcre library but it can be given
|
||||||
|
@echo a test number as an argument.
|
||||||
|
@echo.
|
||||||
|
@echo If the build dir is not under the source dir you can either copy your exes
|
||||||
|
@echo to the source folder or copy RunTest.bat and the testdata folder to the
|
||||||
|
@echo location of your built exes and then run RunTest.bat.
|
||||||
|
@echo.
|
||||||
|
goto :eof
|
||||||
|
7245
tools/pcre/aclocal.m4
vendored
7245
tools/pcre/aclocal.m4
vendored
File diff suppressed because it is too large
Load Diff
17
tools/pcre/cmake/FindEditline.cmake
Normal file
17
tools/pcre/cmake/FindEditline.cmake
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Modified from FindReadline.cmake (PH Feb 2012)
|
||||||
|
|
||||||
|
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||||
|
set(EDITLINE_FOUND TRUE)
|
||||||
|
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||||
|
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
|
||||||
|
/usr/include/editline
|
||||||
|
/usr/include/edit/readline
|
||||||
|
/usr/include/readline
|
||||||
|
)
|
||||||
|
|
||||||
|
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||||
|
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
343
tools/pcre/compile
Normal file
343
tools/pcre/compile
Normal file
@ -0,0 +1,343 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
# Wrapper for compilers which do not understand '-c -o'.
|
||||||
|
|
||||||
|
scriptversion=2012-03-05.13; # UTC
|
||||||
|
|
||||||
|
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009, 2010, 2012 Free
|
||||||
|
# Software Foundation, Inc.
|
||||||
|
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
# As a special exception to the GNU General Public License, if you
|
||||||
|
# distribute this file as part of a program that contains a
|
||||||
|
# configuration script generated by Autoconf, you may include it under
|
||||||
|
# the same distribution terms that you use for the rest of that program.
|
||||||
|
|
||||||
|
# This file is maintained in Automake, please report
|
||||||
|
# bugs to <bug-automake@gnu.org> or send patches to
|
||||||
|
# <automake-patches@gnu.org>.
|
||||||
|
|
||||||
|
nl='
|
||||||
|
'
|
||||||
|
|
||||||
|
# We need space, tab and new line, in precisely that order. Quoting is
|
||||||
|
# there to prevent tools from complaining about whitespace usage.
|
||||||
|
IFS=" "" $nl"
|
||||||
|
|
||||||
|
file_conv=
|
||||||
|
|
||||||
|
# func_file_conv build_file lazy
|
||||||
|
# Convert a $build file to $host form and store it in $file
|
||||||
|
# Currently only supports Windows hosts. If the determined conversion
|
||||||
|
# type is listed in (the comma separated) LAZY, no conversion will
|
||||||
|
# take place.
|
||||||
|
func_file_conv ()
|
||||||
|
{
|
||||||
|
file=$1
|
||||||
|
case $file in
|
||||||
|
/ | /[!/]*) # absolute file, and not a UNC file
|
||||||
|
if test -z "$file_conv"; then
|
||||||
|
# lazily determine how to convert abs files
|
||||||
|
case `uname -s` in
|
||||||
|
MINGW*)
|
||||||
|
file_conv=mingw
|
||||||
|
;;
|
||||||
|
CYGWIN*)
|
||||||
|
file_conv=cygwin
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
file_conv=wine
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
case $file_conv/,$2, in
|
||||||
|
*,$file_conv,*)
|
||||||
|
;;
|
||||||
|
mingw/*)
|
||||||
|
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||||
|
;;
|
||||||
|
cygwin/*)
|
||||||
|
file=`cygpath -m "$file" || echo "$file"`
|
||||||
|
;;
|
||||||
|
wine/*)
|
||||||
|
file=`winepath -w "$file" || echo "$file"`
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
# func_cl_dashL linkdir
|
||||||
|
# Make cl look for libraries in LINKDIR
|
||||||
|
func_cl_dashL ()
|
||||||
|
{
|
||||||
|
func_file_conv "$1"
|
||||||
|
if test -z "$lib_path"; then
|
||||||
|
lib_path=$file
|
||||||
|
else
|
||||||
|
lib_path="$lib_path;$file"
|
||||||
|
fi
|
||||||
|
linker_opts="$linker_opts -LIBPATH:$file"
|
||||||
|
}
|
||||||
|
|
||||||
|
# func_cl_dashl library
|
||||||
|
# Do a library search-path lookup for cl
|
||||||
|
func_cl_dashl ()
|
||||||
|
{
|
||||||
|
lib=$1
|
||||||
|
found=no
|
||||||
|
save_IFS=$IFS
|
||||||
|
IFS=';'
|
||||||
|
for dir in $lib_path $LIB
|
||||||
|
do
|
||||||
|
IFS=$save_IFS
|
||||||
|
if $shared && test -f "$dir/$lib.dll.lib"; then
|
||||||
|
found=yes
|
||||||
|
lib=$dir/$lib.dll.lib
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if test -f "$dir/$lib.lib"; then
|
||||||
|
found=yes
|
||||||
|
lib=$dir/$lib.lib
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
IFS=$save_IFS
|
||||||
|
|
||||||
|
if test "$found" != yes; then
|
||||||
|
lib=$lib.lib
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# func_cl_wrapper cl arg...
|
||||||
|
# Adjust compile command to suit cl
|
||||||
|
func_cl_wrapper ()
|
||||||
|
{
|
||||||
|
# Assume a capable shell
|
||||||
|
lib_path=
|
||||||
|
shared=:
|
||||||
|
linker_opts=
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
if test -n "$eat"; then
|
||||||
|
eat=
|
||||||
|
else
|
||||||
|
case $1 in
|
||||||
|
-o)
|
||||||
|
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||||
|
eat=1
|
||||||
|
case $2 in
|
||||||
|
*.o | *.[oO][bB][jJ])
|
||||||
|
func_file_conv "$2"
|
||||||
|
set x "$@" -Fo"$file"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
func_file_conv "$2"
|
||||||
|
set x "$@" -Fe"$file"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
-I)
|
||||||
|
eat=1
|
||||||
|
func_file_conv "$2" mingw
|
||||||
|
set x "$@" -I"$file"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-I*)
|
||||||
|
func_file_conv "${1#-I}" mingw
|
||||||
|
set x "$@" -I"$file"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-l)
|
||||||
|
eat=1
|
||||||
|
func_cl_dashl "$2"
|
||||||
|
set x "$@" "$lib"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-l*)
|
||||||
|
func_cl_dashl "${1#-l}"
|
||||||
|
set x "$@" "$lib"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-L)
|
||||||
|
eat=1
|
||||||
|
func_cl_dashL "$2"
|
||||||
|
;;
|
||||||
|
-L*)
|
||||||
|
func_cl_dashL "${1#-L}"
|
||||||
|
;;
|
||||||
|
-static)
|
||||||
|
shared=false
|
||||||
|
;;
|
||||||
|
-Wl,*)
|
||||||
|
arg=${1#-Wl,}
|
||||||
|
save_ifs="$IFS"; IFS=','
|
||||||
|
for flag in $arg; do
|
||||||
|
IFS="$save_ifs"
|
||||||
|
linker_opts="$linker_opts $flag"
|
||||||
|
done
|
||||||
|
IFS="$save_ifs"
|
||||||
|
;;
|
||||||
|
-Xlinker)
|
||||||
|
eat=1
|
||||||
|
linker_opts="$linker_opts $2"
|
||||||
|
;;
|
||||||
|
-*)
|
||||||
|
set x "$@" "$1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
|
||||||
|
func_file_conv "$1"
|
||||||
|
set x "$@" -Tp"$file"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
|
||||||
|
func_file_conv "$1" mingw
|
||||||
|
set x "$@" "$file"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
set x "$@" "$1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
if test -n "$linker_opts"; then
|
||||||
|
linker_opts="-link$linker_opts"
|
||||||
|
fi
|
||||||
|
exec "$@" $linker_opts
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
eat=
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
'')
|
||||||
|
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||||
|
exit 1;
|
||||||
|
;;
|
||||||
|
-h | --h*)
|
||||||
|
cat <<\EOF
|
||||||
|
Usage: compile [--help] [--version] PROGRAM [ARGS]
|
||||||
|
|
||||||
|
Wrapper for compilers which do not understand '-c -o'.
|
||||||
|
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
|
||||||
|
arguments, and rename the output as expected.
|
||||||
|
|
||||||
|
If you are trying to build a whole package this is not the
|
||||||
|
right script to run: please start by reading the file 'INSTALL'.
|
||||||
|
|
||||||
|
Report bugs to <bug-automake@gnu.org>.
|
||||||
|
EOF
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
-v | --v*)
|
||||||
|
echo "compile $scriptversion"
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
|
||||||
|
func_cl_wrapper "$@" # Doesn't return...
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
ofile=
|
||||||
|
cfile=
|
||||||
|
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
if test -n "$eat"; then
|
||||||
|
eat=
|
||||||
|
else
|
||||||
|
case $1 in
|
||||||
|
-o)
|
||||||
|
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||||
|
# So we strip '-o arg' only if arg is an object.
|
||||||
|
eat=1
|
||||||
|
case $2 in
|
||||||
|
*.o | *.obj)
|
||||||
|
ofile=$2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
set x "$@" -o "$2"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
*.c)
|
||||||
|
cfile=$1
|
||||||
|
set x "$@" "$1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
set x "$@" "$1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
if test -z "$ofile" || test -z "$cfile"; then
|
||||||
|
# If no '-o' option was seen then we might have been invoked from a
|
||||||
|
# pattern rule where we don't need one. That is ok -- this is a
|
||||||
|
# normal compilation that the losing compiler can handle. If no
|
||||||
|
# '.c' file was seen then we are probably linking. That is also
|
||||||
|
# ok.
|
||||||
|
exec "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Name of file we expect compiler to create.
|
||||||
|
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
|
||||||
|
|
||||||
|
# Create the lock directory.
|
||||||
|
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
|
||||||
|
# that we are using for the .o file. Also, base the name on the expected
|
||||||
|
# object file name, since that is what matters with a parallel build.
|
||||||
|
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
|
||||||
|
while true; do
|
||||||
|
if mkdir "$lockdir" >/dev/null 2>&1; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
# FIXME: race condition here if user kills between mkdir and trap.
|
||||||
|
trap "rmdir '$lockdir'; exit 1" 1 2 15
|
||||||
|
|
||||||
|
# Run the compile.
|
||||||
|
"$@"
|
||||||
|
ret=$?
|
||||||
|
|
||||||
|
if test -f "$cofile"; then
|
||||||
|
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
|
||||||
|
elif test -f "${cofile}bj"; then
|
||||||
|
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
|
||||||
|
fi
|
||||||
|
|
||||||
|
rmdir "$lockdir"
|
||||||
|
exit $ret
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# mode: shell-script
|
||||||
|
# sh-indentation: 2
|
||||||
|
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||||
|
# time-stamp-start: "scriptversion="
|
||||||
|
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||||
|
# time-stamp-time-zone: "UTC"
|
||||||
|
# time-stamp-end: "; # UTC"
|
||||||
|
# End:
|
@ -18,9 +18,15 @@
|
|||||||
|
|
||||||
#cmakedefine PCRE_STATIC 1
|
#cmakedefine PCRE_STATIC 1
|
||||||
|
|
||||||
#cmakedefine SUPPORT_UTF8 1
|
#cmakedefine SUPPORT_PCRE8 1
|
||||||
|
#cmakedefine SUPPORT_PCRE16 1
|
||||||
|
#cmakedefine SUPPORT_PCRE32 1
|
||||||
|
#cmakedefine SUPPORT_JIT 1
|
||||||
|
#cmakedefine SUPPORT_PCREGREP_JIT 1
|
||||||
|
#cmakedefine SUPPORT_UTF 1
|
||||||
#cmakedefine SUPPORT_UCP 1
|
#cmakedefine SUPPORT_UCP 1
|
||||||
#cmakedefine EBCDIC 1
|
#cmakedefine EBCDIC 1
|
||||||
|
#cmakedefine EBCDIC_NL25 1
|
||||||
#cmakedefine BSR_ANYCRLF 1
|
#cmakedefine BSR_ANYCRLF 1
|
||||||
#cmakedefine NO_RECURSE 1
|
#cmakedefine NO_RECURSE 1
|
||||||
|
|
||||||
@ -29,14 +35,18 @@
|
|||||||
|
|
||||||
#cmakedefine SUPPORT_LIBBZ2 1
|
#cmakedefine SUPPORT_LIBBZ2 1
|
||||||
#cmakedefine SUPPORT_LIBZ 1
|
#cmakedefine SUPPORT_LIBZ 1
|
||||||
|
#cmakedefine SUPPORT_LIBEDIT 1
|
||||||
#cmakedefine SUPPORT_LIBREADLINE 1
|
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||||
|
|
||||||
|
#cmakedefine SUPPORT_VALGRIND 1
|
||||||
|
#cmakedefine SUPPORT_GCOV 1
|
||||||
|
|
||||||
#define NEWLINE @NEWLINE@
|
#define NEWLINE @NEWLINE@
|
||||||
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
||||||
#define LINK_SIZE @PCRE_LINK_SIZE@
|
#define LINK_SIZE @PCRE_LINK_SIZE@
|
||||||
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
||||||
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
||||||
|
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
|
||||||
|
|
||||||
#define MAX_NAME_SIZE 32
|
#define MAX_NAME_SIZE 32
|
||||||
#define MAX_NAME_COUNT 10000
|
#define MAX_NAME_COUNT 10000
|
||||||
|
568
tools/pcre/config.guess
vendored
568
tools/pcre/config.guess
vendored
@ -1,10 +1,10 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Attempt to guess a canonical system name.
|
# Attempt to guess a canonical system name.
|
||||||
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
||||||
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
|
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||||
# Free Software Foundation, Inc.
|
# 2011, 2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
timestamp='2008-09-28'
|
timestamp='2012-08-14'
|
||||||
|
|
||||||
# This file is free software; you can redistribute it and/or modify it
|
# This file is free software; you can redistribute it and/or modify it
|
||||||
# under the terms of the GNU General Public License as published by
|
# under the terms of the GNU General Public License as published by
|
||||||
@ -17,9 +17,7 @@ timestamp='2008-09-28'
|
|||||||
# General Public License for more details.
|
# General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||||
# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
|
||||||
# 02110-1301, USA.
|
|
||||||
#
|
#
|
||||||
# As a special exception to the GNU General Public License, if you
|
# As a special exception to the GNU General Public License, if you
|
||||||
# distribute this file as part of a program that contains a
|
# distribute this file as part of a program that contains a
|
||||||
@ -27,16 +25,16 @@ timestamp='2008-09-28'
|
|||||||
# the same distribution terms that you use for the rest of that program.
|
# the same distribution terms that you use for the rest of that program.
|
||||||
|
|
||||||
|
|
||||||
# Originally written by Per Bothner <per@bothner.com>.
|
# Originally written by Per Bothner. Please send patches (context
|
||||||
# Please send patches to <config-patches@gnu.org>. Submit a context
|
# diff format) to <config-patches@gnu.org> and include a ChangeLog
|
||||||
# diff and a properly formatted ChangeLog entry.
|
# entry.
|
||||||
#
|
#
|
||||||
# This script attempts to guess a canonical system name similar to
|
# This script attempts to guess a canonical system name similar to
|
||||||
# config.sub. If it succeeds, it prints the system name on stdout, and
|
# config.sub. If it succeeds, it prints the system name on stdout, and
|
||||||
# exits with 0. Otherwise, it exits with 1.
|
# exits with 0. Otherwise, it exits with 1.
|
||||||
#
|
#
|
||||||
# The plan is that this can be called by configure scripts if you
|
# You can get the latest version of this script from:
|
||||||
# don't specify an explicit build system type.
|
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
|
||||||
|
|
||||||
me=`echo "$0" | sed -e 's,.*/,,'`
|
me=`echo "$0" | sed -e 's,.*/,,'`
|
||||||
|
|
||||||
@ -56,8 +54,9 @@ version="\
|
|||||||
GNU config.guess ($timestamp)
|
GNU config.guess ($timestamp)
|
||||||
|
|
||||||
Originally written by Per Bothner.
|
Originally written by Per Bothner.
|
||||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
|
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
||||||
2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
|
||||||
|
Free Software Foundation, Inc.
|
||||||
|
|
||||||
This is free software; see the source for copying conditions. There is NO
|
This is free software; see the source for copying conditions. There is NO
|
||||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||||
@ -139,9 +138,10 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
|
|||||||
UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
|
UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
|
||||||
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
|
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
|
||||||
|
|
||||||
if [ "${UNAME_SYSTEM}" = "Linux" ] ; then
|
case "${UNAME_SYSTEM}" in
|
||||||
|
Linux|GNU/*)
|
||||||
eval $set_cc_for_build
|
eval $set_cc_for_build
|
||||||
cat << EOF > $dummy.c
|
cat <<-EOF > $dummy.c
|
||||||
#include <features.h>
|
#include <features.h>
|
||||||
#ifdef __UCLIBC__
|
#ifdef __UCLIBC__
|
||||||
# ifdef __UCLIBC_CONFIG_VERSION__
|
# ifdef __UCLIBC_CONFIG_VERSION__
|
||||||
@ -150,18 +150,23 @@ if [ "${UNAME_SYSTEM}" = "Linux" ] ; then
|
|||||||
LIBC=uclibc
|
LIBC=uclibc
|
||||||
# endif
|
# endif
|
||||||
#else
|
#else
|
||||||
|
# ifdef __dietlibc__
|
||||||
|
LIBC=dietlibc
|
||||||
|
# else
|
||||||
LIBC=gnu
|
LIBC=gnu
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
EOF
|
EOF
|
||||||
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep LIBC= | sed -e 's: ::g'`
|
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
|
||||||
fi
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
# Note: order is significant - the case branches are not exclusive.
|
# Note: order is significant - the case branches are not exclusive.
|
||||||
|
|
||||||
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
||||||
*:NetBSD:*:*)
|
*:NetBSD:*:*)
|
||||||
# NetBSD (nbsd) targets should (where applicable) match one or
|
# NetBSD (nbsd) targets should (where applicable) match one or
|
||||||
# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
|
# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
|
||||||
# *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
|
# *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
|
||||||
# switched to ELF, *-*-netbsd* would select the old
|
# switched to ELF, *-*-netbsd* would select the old
|
||||||
# object file format. This provides both forward
|
# object file format. This provides both forward
|
||||||
@ -187,7 +192,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
|
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
|
||||||
eval $set_cc_for_build
|
eval $set_cc_for_build
|
||||||
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||||
| grep __ELF__ >/dev/null
|
| grep -q __ELF__
|
||||||
then
|
then
|
||||||
# Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
|
# Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
|
||||||
# Return netbsd for either. FIX?
|
# Return netbsd for either. FIX?
|
||||||
@ -197,7 +202,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
os=netbsd
|
os=netbsd
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
# The OS release
|
# The OS release
|
||||||
@ -218,6 +223,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
|
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
|
||||||
echo "${machine}-${os}${release}"
|
echo "${machine}-${os}${release}"
|
||||||
exit ;;
|
exit ;;
|
||||||
|
*:Bitrig:*:*)
|
||||||
|
UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
|
||||||
|
echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
|
||||||
|
exit ;;
|
||||||
*:OpenBSD:*:*)
|
*:OpenBSD:*:*)
|
||||||
UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
|
UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
|
||||||
echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
|
echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
|
||||||
@ -240,7 +249,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
|
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
|
||||||
;;
|
;;
|
||||||
*5.*)
|
*5.*)
|
||||||
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
|
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
# According to Compaq, /usr/sbin/psrinfo has been available on
|
# According to Compaq, /usr/sbin/psrinfo has been available on
|
||||||
@ -286,7 +295,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
# A Xn.n version is an unreleased experimental baselevel.
|
# A Xn.n version is an unreleased experimental baselevel.
|
||||||
# 1.2 uses "1.2" for uname -r.
|
# 1.2 uses "1.2" for uname -r.
|
||||||
echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
|
echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
|
||||||
exit ;;
|
# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
|
||||||
|
exitcode=$?
|
||||||
|
trap '' 0
|
||||||
|
exit $exitcode ;;
|
||||||
Alpha\ *:Windows_NT*:*)
|
Alpha\ *:Windows_NT*:*)
|
||||||
# How do we know it's Interix rather than the generic POSIX subsystem?
|
# How do we know it's Interix rather than the generic POSIX subsystem?
|
||||||
# Should we change UNAME_MACHINE based on the output of uname instead
|
# Should we change UNAME_MACHINE based on the output of uname instead
|
||||||
@ -312,7 +324,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
echo s390-ibm-zvmoe
|
echo s390-ibm-zvmoe
|
||||||
exit ;;
|
exit ;;
|
||||||
*:OS400:*:*)
|
*:OS400:*:*)
|
||||||
echo powerpc-ibm-os400
|
echo powerpc-ibm-os400
|
||||||
exit ;;
|
exit ;;
|
||||||
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
|
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
|
||||||
echo arm-acorn-riscix${UNAME_RELEASE}
|
echo arm-acorn-riscix${UNAME_RELEASE}
|
||||||
@ -341,14 +353,33 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
case `/usr/bin/uname -p` in
|
case `/usr/bin/uname -p` in
|
||||||
sparc) echo sparc-icl-nx7; exit ;;
|
sparc) echo sparc-icl-nx7; exit ;;
|
||||||
esac ;;
|
esac ;;
|
||||||
|
s390x:SunOS:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
|
||||||
|
exit ;;
|
||||||
sun4H:SunOS:5.*:*)
|
sun4H:SunOS:5.*:*)
|
||||||
echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
|
echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
|
||||||
exit ;;
|
exit ;;
|
||||||
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
|
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
|
||||||
echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
|
echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
|
||||||
exit ;;
|
exit ;;
|
||||||
|
i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
|
||||||
|
echo i386-pc-auroraux${UNAME_RELEASE}
|
||||||
|
exit ;;
|
||||||
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
|
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
|
||||||
echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
|
eval $set_cc_for_build
|
||||||
|
SUN_ARCH="i386"
|
||||||
|
# If there is a compiler, see if it is configured for 64-bit objects.
|
||||||
|
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
|
||||||
|
# This test works for both compilers.
|
||||||
|
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
|
||||||
|
if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
|
||||||
|
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||||
|
grep IS_64BIT_ARCH >/dev/null
|
||||||
|
then
|
||||||
|
SUN_ARCH="x86_64"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
|
||||||
exit ;;
|
exit ;;
|
||||||
sun4*:SunOS:6*:*)
|
sun4*:SunOS:6*:*)
|
||||||
# According to config.sub, this is the proper way to canonicalize
|
# According to config.sub, this is the proper way to canonicalize
|
||||||
@ -392,23 +423,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
# MiNT. But MiNT is downward compatible to TOS, so this should
|
# MiNT. But MiNT is downward compatible to TOS, so this should
|
||||||
# be no problem.
|
# be no problem.
|
||||||
atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
|
atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
|
||||||
echo m68k-atari-mint${UNAME_RELEASE}
|
echo m68k-atari-mint${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
|
atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
|
||||||
echo m68k-atari-mint${UNAME_RELEASE}
|
echo m68k-atari-mint${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
*falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
|
*falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
|
||||||
echo m68k-atari-mint${UNAME_RELEASE}
|
echo m68k-atari-mint${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
|
milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
|
||||||
echo m68k-milan-mint${UNAME_RELEASE}
|
echo m68k-milan-mint${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
|
hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
|
||||||
echo m68k-hades-mint${UNAME_RELEASE}
|
echo m68k-hades-mint${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
*:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
|
*:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
|
||||||
echo m68k-unknown-mint${UNAME_RELEASE}
|
echo m68k-unknown-mint${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
m68k:machten:*:*)
|
m68k:machten:*:*)
|
||||||
echo m68k-apple-machten${UNAME_RELEASE}
|
echo m68k-apple-machten${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -478,8 +509,8 @@ EOF
|
|||||||
echo m88k-motorola-sysv3
|
echo m88k-motorola-sysv3
|
||||||
exit ;;
|
exit ;;
|
||||||
AViiON:dgux:*:*)
|
AViiON:dgux:*:*)
|
||||||
# DG/UX returns AViiON for all architectures
|
# DG/UX returns AViiON for all architectures
|
||||||
UNAME_PROCESSOR=`/usr/bin/uname -p`
|
UNAME_PROCESSOR=`/usr/bin/uname -p`
|
||||||
if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
|
if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
|
||||||
then
|
then
|
||||||
if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
|
if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
|
||||||
@ -492,7 +523,7 @@ EOF
|
|||||||
else
|
else
|
||||||
echo i586-dg-dgux${UNAME_RELEASE}
|
echo i586-dg-dgux${UNAME_RELEASE}
|
||||||
fi
|
fi
|
||||||
exit ;;
|
exit ;;
|
||||||
M88*:DolphinOS:*:*) # DolphinOS (SVR3)
|
M88*:DolphinOS:*:*) # DolphinOS (SVR3)
|
||||||
echo m88k-dolphin-sysv3
|
echo m88k-dolphin-sysv3
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -549,7 +580,7 @@ EOF
|
|||||||
echo rs6000-ibm-aix3.2
|
echo rs6000-ibm-aix3.2
|
||||||
fi
|
fi
|
||||||
exit ;;
|
exit ;;
|
||||||
*:AIX:*:[456])
|
*:AIX:*:[4567])
|
||||||
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
|
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
|
||||||
if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
|
if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
|
||||||
IBM_ARCH=rs6000
|
IBM_ARCH=rs6000
|
||||||
@ -592,52 +623,52 @@ EOF
|
|||||||
9000/[678][0-9][0-9])
|
9000/[678][0-9][0-9])
|
||||||
if [ -x /usr/bin/getconf ]; then
|
if [ -x /usr/bin/getconf ]; then
|
||||||
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
|
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
|
||||||
sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
|
sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
|
||||||
case "${sc_cpu_version}" in
|
case "${sc_cpu_version}" in
|
||||||
523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
|
523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
|
||||||
528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
|
528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
|
||||||
532) # CPU_PA_RISC2_0
|
532) # CPU_PA_RISC2_0
|
||||||
case "${sc_kernel_bits}" in
|
case "${sc_kernel_bits}" in
|
||||||
32) HP_ARCH="hppa2.0n" ;;
|
32) HP_ARCH="hppa2.0n" ;;
|
||||||
64) HP_ARCH="hppa2.0w" ;;
|
64) HP_ARCH="hppa2.0w" ;;
|
||||||
'') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
|
'') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
|
||||||
esac ;;
|
esac ;;
|
||||||
esac
|
esac
|
||||||
fi
|
fi
|
||||||
if [ "${HP_ARCH}" = "" ]; then
|
if [ "${HP_ARCH}" = "" ]; then
|
||||||
eval $set_cc_for_build
|
eval $set_cc_for_build
|
||||||
sed 's/^ //' << EOF >$dummy.c
|
sed 's/^ //' << EOF >$dummy.c
|
||||||
|
|
||||||
#define _HPUX_SOURCE
|
#define _HPUX_SOURCE
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
int main ()
|
int main ()
|
||||||
{
|
{
|
||||||
#if defined(_SC_KERNEL_BITS)
|
#if defined(_SC_KERNEL_BITS)
|
||||||
long bits = sysconf(_SC_KERNEL_BITS);
|
long bits = sysconf(_SC_KERNEL_BITS);
|
||||||
#endif
|
#endif
|
||||||
long cpu = sysconf (_SC_CPU_VERSION);
|
long cpu = sysconf (_SC_CPU_VERSION);
|
||||||
|
|
||||||
switch (cpu)
|
switch (cpu)
|
||||||
{
|
{
|
||||||
case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
|
case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
|
||||||
case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
|
case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
|
||||||
case CPU_PA_RISC2_0:
|
case CPU_PA_RISC2_0:
|
||||||
#if defined(_SC_KERNEL_BITS)
|
#if defined(_SC_KERNEL_BITS)
|
||||||
switch (bits)
|
switch (bits)
|
||||||
{
|
{
|
||||||
case 64: puts ("hppa2.0w"); break;
|
case 64: puts ("hppa2.0w"); break;
|
||||||
case 32: puts ("hppa2.0n"); break;
|
case 32: puts ("hppa2.0n"); break;
|
||||||
default: puts ("hppa2.0"); break;
|
default: puts ("hppa2.0"); break;
|
||||||
} break;
|
} break;
|
||||||
#else /* !defined(_SC_KERNEL_BITS) */
|
#else /* !defined(_SC_KERNEL_BITS) */
|
||||||
puts ("hppa2.0"); break;
|
puts ("hppa2.0"); break;
|
||||||
#endif
|
#endif
|
||||||
default: puts ("hppa1.0"); break;
|
default: puts ("hppa1.0"); break;
|
||||||
}
|
}
|
||||||
exit (0);
|
exit (0);
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
(CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
|
(CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
|
||||||
test -z "$HP_ARCH" && HP_ARCH=hppa
|
test -z "$HP_ARCH" && HP_ARCH=hppa
|
||||||
@ -657,7 +688,7 @@ EOF
|
|||||||
# => hppa64-hp-hpux11.23
|
# => hppa64-hp-hpux11.23
|
||||||
|
|
||||||
if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
|
if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
|
||||||
grep __LP64__ >/dev/null
|
grep -q __LP64__
|
||||||
then
|
then
|
||||||
HP_ARCH="hppa2.0w"
|
HP_ARCH="hppa2.0w"
|
||||||
else
|
else
|
||||||
@ -728,22 +759,22 @@ EOF
|
|||||||
exit ;;
|
exit ;;
|
||||||
C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
|
C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
|
||||||
echo c1-convex-bsd
|
echo c1-convex-bsd
|
||||||
exit ;;
|
exit ;;
|
||||||
C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
|
C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
|
||||||
if getsysinfo -f scalar_acc
|
if getsysinfo -f scalar_acc
|
||||||
then echo c32-convex-bsd
|
then echo c32-convex-bsd
|
||||||
else echo c2-convex-bsd
|
else echo c2-convex-bsd
|
||||||
fi
|
fi
|
||||||
exit ;;
|
exit ;;
|
||||||
C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
|
C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
|
||||||
echo c34-convex-bsd
|
echo c34-convex-bsd
|
||||||
exit ;;
|
exit ;;
|
||||||
C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
|
C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
|
||||||
echo c38-convex-bsd
|
echo c38-convex-bsd
|
||||||
exit ;;
|
exit ;;
|
||||||
C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
|
C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
|
||||||
echo c4-convex-bsd
|
echo c4-convex-bsd
|
||||||
exit ;;
|
exit ;;
|
||||||
CRAY*Y-MP:*:*:*)
|
CRAY*Y-MP:*:*:*)
|
||||||
echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
|
echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -767,14 +798,14 @@ EOF
|
|||||||
exit ;;
|
exit ;;
|
||||||
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
|
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
|
||||||
FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
|
FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
|
||||||
FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
|
FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
|
||||||
FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
|
FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
|
||||||
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
|
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
|
||||||
exit ;;
|
exit ;;
|
||||||
5000:UNIX_System_V:4.*:*)
|
5000:UNIX_System_V:4.*:*)
|
||||||
FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
|
FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
|
||||||
FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
|
FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
|
||||||
echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
|
echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
|
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
|
||||||
echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
|
echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
|
||||||
@ -786,34 +817,39 @@ EOF
|
|||||||
echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
|
echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
*:FreeBSD:*:*)
|
*:FreeBSD:*:*)
|
||||||
case ${UNAME_MACHINE} in
|
UNAME_PROCESSOR=`/usr/bin/uname -p`
|
||||||
pc98)
|
case ${UNAME_PROCESSOR} in
|
||||||
echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
|
|
||||||
amd64)
|
amd64)
|
||||||
echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
|
echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
|
||||||
*)
|
*)
|
||||||
echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
|
echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
|
||||||
esac
|
esac
|
||||||
exit ;;
|
exit ;;
|
||||||
i*:CYGWIN*:*)
|
i*:CYGWIN*:*)
|
||||||
echo ${UNAME_MACHINE}-pc-cygwin
|
echo ${UNAME_MACHINE}-pc-cygwin
|
||||||
exit ;;
|
exit ;;
|
||||||
|
*:MINGW64*:*)
|
||||||
|
echo ${UNAME_MACHINE}-pc-mingw64
|
||||||
|
exit ;;
|
||||||
*:MINGW*:*)
|
*:MINGW*:*)
|
||||||
echo ${UNAME_MACHINE}-pc-mingw32
|
echo ${UNAME_MACHINE}-pc-mingw32
|
||||||
exit ;;
|
exit ;;
|
||||||
|
i*:MSYS*:*)
|
||||||
|
echo ${UNAME_MACHINE}-pc-msys
|
||||||
|
exit ;;
|
||||||
i*:windows32*:*)
|
i*:windows32*:*)
|
||||||
# uname -m includes "-pc" on this system.
|
# uname -m includes "-pc" on this system.
|
||||||
echo ${UNAME_MACHINE}-mingw32
|
echo ${UNAME_MACHINE}-mingw32
|
||||||
exit ;;
|
exit ;;
|
||||||
i*:PW*:*)
|
i*:PW*:*)
|
||||||
echo ${UNAME_MACHINE}-pc-pw32
|
echo ${UNAME_MACHINE}-pc-pw32
|
||||||
exit ;;
|
exit ;;
|
||||||
*:Interix*:[3456]*)
|
*:Interix*:*)
|
||||||
case ${UNAME_MACHINE} in
|
case ${UNAME_MACHINE} in
|
||||||
x86)
|
x86)
|
||||||
echo i586-pc-interix${UNAME_RELEASE}
|
echo i586-pc-interix${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
EM64T | authenticamd | genuineintel)
|
authenticamd | genuineintel | EM64T)
|
||||||
echo x86_64-unknown-interix${UNAME_RELEASE}
|
echo x86_64-unknown-interix${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
IA64)
|
IA64)
|
||||||
@ -823,6 +859,9 @@ EOF
|
|||||||
[345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
|
[345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
|
||||||
echo i${UNAME_MACHINE}-pc-mks
|
echo i${UNAME_MACHINE}-pc-mks
|
||||||
exit ;;
|
exit ;;
|
||||||
|
8664:Windows_NT:*)
|
||||||
|
echo x86_64-pc-mks
|
||||||
|
exit ;;
|
||||||
i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
|
i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
|
||||||
# How do we know it's Interix rather than the generic POSIX subsystem?
|
# How do we know it's Interix rather than the generic POSIX subsystem?
|
||||||
# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
|
# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
|
||||||
@ -843,101 +882,22 @@ EOF
|
|||||||
exit ;;
|
exit ;;
|
||||||
*:GNU:*:*)
|
*:GNU:*:*)
|
||||||
# the GNU system
|
# the GNU system
|
||||||
echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
|
echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
|
||||||
exit ;;
|
exit ;;
|
||||||
*:GNU/*:*:*)
|
*:GNU/*:*:*)
|
||||||
# other systems with GNU libc and userland
|
# other systems with GNU libc and userland
|
||||||
echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
|
echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:Minix:*:*)
|
i*86:Minix:*:*)
|
||||||
echo ${UNAME_MACHINE}-pc-minix
|
echo ${UNAME_MACHINE}-pc-minix
|
||||||
exit ;;
|
exit ;;
|
||||||
arm*:Linux:*:*)
|
aarch64:Linux:*:*)
|
||||||
eval $set_cc_for_build
|
|
||||||
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
|
||||||
| grep -q __ARM_EABI__
|
|
||||||
then
|
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
|
||||||
else
|
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
|
|
||||||
fi
|
|
||||||
exit ;;
|
|
||||||
avr32*:Linux:*:*)
|
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
cris:Linux:*:*)
|
aarch64_be:Linux:*:*)
|
||||||
echo cris-axis-linux-${LIBC}
|
UNAME_MACHINE=aarch64_be
|
||||||
exit ;;
|
|
||||||
crisv32:Linux:*:*)
|
|
||||||
echo crisv32-axis-linux-${LIBC}
|
|
||||||
exit ;;
|
|
||||||
frv:Linux:*:*)
|
|
||||||
echo frv-unknown-linux-${LIBC}
|
|
||||||
exit ;;
|
|
||||||
ia64:Linux:*:*)
|
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
m32r*:Linux:*:*)
|
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
|
||||||
exit ;;
|
|
||||||
m68*:Linux:*:*)
|
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
|
||||||
exit ;;
|
|
||||||
mips:Linux:*:*)
|
|
||||||
eval $set_cc_for_build
|
|
||||||
sed 's/^ //' << EOF >$dummy.c
|
|
||||||
#undef CPU
|
|
||||||
#undef mips
|
|
||||||
#undef mipsel
|
|
||||||
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
|
|
||||||
CPU=mipsel
|
|
||||||
#else
|
|
||||||
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
|
|
||||||
CPU=mips
|
|
||||||
#else
|
|
||||||
CPU=
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
EOF
|
|
||||||
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
|
|
||||||
/^CPU/{
|
|
||||||
s: ::g
|
|
||||||
p
|
|
||||||
}'`"
|
|
||||||
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
|
|
||||||
;;
|
|
||||||
mips64:Linux:*:*)
|
|
||||||
eval $set_cc_for_build
|
|
||||||
sed 's/^ //' << EOF >$dummy.c
|
|
||||||
#undef CPU
|
|
||||||
#undef mips64
|
|
||||||
#undef mips64el
|
|
||||||
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
|
|
||||||
CPU=mips64el
|
|
||||||
#else
|
|
||||||
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
|
|
||||||
CPU=mips64
|
|
||||||
#else
|
|
||||||
CPU=
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
EOF
|
|
||||||
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
|
|
||||||
/^CPU/{
|
|
||||||
s: ::g
|
|
||||||
p
|
|
||||||
}'`"
|
|
||||||
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
|
|
||||||
;;
|
|
||||||
or32:Linux:*:*)
|
|
||||||
echo or32-unknown-linux-${LIBC}
|
|
||||||
exit ;;
|
|
||||||
ppc:Linux:*:*)
|
|
||||||
echo powerpc-unknown-linux-${LIBC}
|
|
||||||
exit ;;
|
|
||||||
ppc64:Linux:*:*)
|
|
||||||
echo powerpc64-unknown-linux-${LIBC}
|
|
||||||
exit ;;
|
|
||||||
alpha:Linux:*:*)
|
alpha:Linux:*:*)
|
||||||
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
|
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
|
||||||
EV5) UNAME_MACHINE=alphaev5 ;;
|
EV5) UNAME_MACHINE=alphaev5 ;;
|
||||||
@ -947,13 +907,81 @@ EOF
|
|||||||
EV6) UNAME_MACHINE=alphaev6 ;;
|
EV6) UNAME_MACHINE=alphaev6 ;;
|
||||||
EV67) UNAME_MACHINE=alphaev67 ;;
|
EV67) UNAME_MACHINE=alphaev67 ;;
|
||||||
EV68*) UNAME_MACHINE=alphaev68 ;;
|
EV68*) UNAME_MACHINE=alphaev68 ;;
|
||||||
esac
|
esac
|
||||||
objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
|
objdump --private-headers /bin/sh | grep -q ld.so.1
|
||||||
if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
|
if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
|
arm*:Linux:*:*)
|
||||||
|
eval $set_cc_for_build
|
||||||
|
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||||
|
| grep -q __ARM_EABI__
|
||||||
|
then
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
else
|
||||||
|
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||||
|
| grep -q __ARM_PCS_VFP
|
||||||
|
then
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
|
||||||
|
else
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
exit ;;
|
||||||
|
avr32*:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
cris:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-axis-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
crisv32:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-axis-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
frv:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
hexagon:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
i*86:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-pc-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
ia64:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
m32r*:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
m68*:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
mips:Linux:*:* | mips64:Linux:*:*)
|
||||||
|
eval $set_cc_for_build
|
||||||
|
sed 's/^ //' << EOF >$dummy.c
|
||||||
|
#undef CPU
|
||||||
|
#undef ${UNAME_MACHINE}
|
||||||
|
#undef ${UNAME_MACHINE}el
|
||||||
|
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
|
||||||
|
CPU=${UNAME_MACHINE}el
|
||||||
|
#else
|
||||||
|
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
|
||||||
|
CPU=${UNAME_MACHINE}
|
||||||
|
#else
|
||||||
|
CPU=
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
EOF
|
||||||
|
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
|
||||||
|
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
|
||||||
|
;;
|
||||||
|
or32:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
padre:Linux:*:*)
|
padre:Linux:*:*)
|
||||||
echo sparc-unknown-linux-gnu
|
echo sparc-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
parisc64:Linux:*:* | hppa64:Linux:*:*)
|
||||||
|
echo hppa64-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
parisc:Linux:*:* | hppa:Linux:*:*)
|
parisc:Linux:*:* | hppa:Linux:*:*)
|
||||||
# Look for CPU level
|
# Look for CPU level
|
||||||
@ -963,14 +991,17 @@ EOF
|
|||||||
*) echo hppa-unknown-linux-${LIBC} ;;
|
*) echo hppa-unknown-linux-${LIBC} ;;
|
||||||
esac
|
esac
|
||||||
exit ;;
|
exit ;;
|
||||||
parisc64:Linux:*:* | hppa64:Linux:*:*)
|
ppc64:Linux:*:*)
|
||||||
echo hppa64-unknown-linux-${LIBC}
|
echo powerpc64-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
ppc:Linux:*:*)
|
||||||
|
echo powerpc-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
s390:Linux:*:* | s390x:Linux:*:*)
|
s390:Linux:*:* | s390x:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-ibm-linux
|
echo ${UNAME_MACHINE}-ibm-linux
|
||||||
exit ;;
|
exit ;;
|
||||||
sh64*:Linux:*:*)
|
sh64*:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
sh*:Linux:*:*)
|
sh*:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
@ -978,77 +1009,18 @@ EOF
|
|||||||
sparc:Linux:*:* | sparc64:Linux:*:*)
|
sparc:Linux:*:* | sparc64:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
|
tile*:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
vax:Linux:*:*)
|
vax:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-dec-linux-${LIBC}
|
echo ${UNAME_MACHINE}-dec-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
x86_64:Linux:*:*)
|
x86_64:Linux:*:*)
|
||||||
echo x86_64-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
xtensa*:Linux:*:*)
|
xtensa*:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:Linux:*:*)
|
|
||||||
# The BFD linker knows what the default object file format is, so
|
|
||||||
# first see if it will tell us. cd to the root directory to prevent
|
|
||||||
# problems with other programs or directories called `ld' in the path.
|
|
||||||
# Set LC_ALL=C to ensure ld outputs messages in English.
|
|
||||||
ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
|
|
||||||
| sed -ne '/supported targets:/!d
|
|
||||||
s/[ ][ ]*/ /g
|
|
||||||
s/.*supported targets: *//
|
|
||||||
s/ .*//
|
|
||||||
p'`
|
|
||||||
case "$ld_supported_targets" in
|
|
||||||
elf32-i386)
|
|
||||||
TENTATIVE="${UNAME_MACHINE}-pc-linux-${LIBC}"
|
|
||||||
;;
|
|
||||||
a.out-i386-linux)
|
|
||||||
echo "${UNAME_MACHINE}-pc-linux-${LIBC}aout"
|
|
||||||
exit ;;
|
|
||||||
"")
|
|
||||||
# Either a pre-BFD a.out linker (linux-gnuoldld) or
|
|
||||||
# one that does not give us useful --help.
|
|
||||||
echo "${UNAME_MACHINE}-pc-linux-${LIBC}oldld"
|
|
||||||
exit ;;
|
|
||||||
esac
|
|
||||||
# This should get integrated into the C code below, but now we hack
|
|
||||||
if [ "$LIBC" != "gnu" ] ; then echo "$TENTATIVE" && exit 0 ; fi
|
|
||||||
# Determine whether the default compiler is a.out or elf
|
|
||||||
eval $set_cc_for_build
|
|
||||||
sed 's/^ //' << EOF >$dummy.c
|
|
||||||
#include <features.h>
|
|
||||||
#ifdef __ELF__
|
|
||||||
# ifdef __GLIBC__
|
|
||||||
# if __GLIBC__ >= 2
|
|
||||||
LIBC=gnu
|
|
||||||
# else
|
|
||||||
LIBC=gnulibc1
|
|
||||||
# endif
|
|
||||||
# else
|
|
||||||
LIBC=gnulibc1
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
#if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
|
||||||
LIBC=gnu
|
|
||||||
#else
|
|
||||||
LIBC=gnuaout
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#ifdef __dietlibc__
|
|
||||||
LIBC=dietlibc
|
|
||||||
#endif
|
|
||||||
EOF
|
|
||||||
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
|
|
||||||
/^LIBC/{
|
|
||||||
s: ::g
|
|
||||||
p
|
|
||||||
}'`"
|
|
||||||
test x"${LIBC}" != x && {
|
|
||||||
echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
|
|
||||||
exit
|
|
||||||
}
|
|
||||||
test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
|
|
||||||
;;
|
|
||||||
i*86:DYNIX/ptx:4*:*)
|
i*86:DYNIX/ptx:4*:*)
|
||||||
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
|
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
|
||||||
# earlier versions are messed up and put the nodename in both
|
# earlier versions are messed up and put the nodename in both
|
||||||
@ -1056,11 +1028,11 @@ EOF
|
|||||||
echo i386-sequent-sysv4
|
echo i386-sequent-sysv4
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:UNIX_SV:4.2MP:2.*)
|
i*86:UNIX_SV:4.2MP:2.*)
|
||||||
# Unixware is an offshoot of SVR4, but it has its own version
|
# Unixware is an offshoot of SVR4, but it has its own version
|
||||||
# number series starting with 2...
|
# number series starting with 2...
|
||||||
# I am not positive that other SVR4 systems won't match this,
|
# I am not positive that other SVR4 systems won't match this,
|
||||||
# I just have to hope. -- rms.
|
# I just have to hope. -- rms.
|
||||||
# Use sysv4.2uw... so that sysv4* matches it.
|
# Use sysv4.2uw... so that sysv4* matches it.
|
||||||
echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
|
echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:OS/2:*:*)
|
i*86:OS/2:*:*)
|
||||||
@ -1077,7 +1049,7 @@ EOF
|
|||||||
i*86:syllable:*:*)
|
i*86:syllable:*:*)
|
||||||
echo ${UNAME_MACHINE}-pc-syllable
|
echo ${UNAME_MACHINE}-pc-syllable
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
|
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
|
||||||
echo i386-unknown-lynxos${UNAME_RELEASE}
|
echo i386-unknown-lynxos${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:*DOS:*:*)
|
i*86:*DOS:*:*)
|
||||||
@ -1092,7 +1064,7 @@ EOF
|
|||||||
fi
|
fi
|
||||||
exit ;;
|
exit ;;
|
||||||
i*86:*:5:[678]*)
|
i*86:*:5:[678]*)
|
||||||
# UnixWare 7.x, OpenUNIX and OpenServer 6.
|
# UnixWare 7.x, OpenUNIX and OpenServer 6.
|
||||||
case `/bin/uname -X | grep "^Machine"` in
|
case `/bin/uname -X | grep "^Machine"` in
|
||||||
*486*) UNAME_MACHINE=i486 ;;
|
*486*) UNAME_MACHINE=i486 ;;
|
||||||
*Pentium) UNAME_MACHINE=i586 ;;
|
*Pentium) UNAME_MACHINE=i586 ;;
|
||||||
@ -1120,10 +1092,13 @@ EOF
|
|||||||
exit ;;
|
exit ;;
|
||||||
pc:*:*:*)
|
pc:*:*:*)
|
||||||
# Left here for compatibility:
|
# Left here for compatibility:
|
||||||
# uname -m prints for DJGPP always 'pc', but it prints nothing about
|
# uname -m prints for DJGPP always 'pc', but it prints nothing about
|
||||||
# the processor, so we play safe by assuming i386.
|
# the processor, so we play safe by assuming i586.
|
||||||
echo i386-pc-msdosdjgpp
|
# Note: whatever this is, it MUST be the same as what config.sub
|
||||||
exit ;;
|
# prints for the "djgpp" host, or else GDB configury will decide that
|
||||||
|
# this is a cross-build.
|
||||||
|
echo i586-pc-msdosdjgpp
|
||||||
|
exit ;;
|
||||||
Intel:Mach:3*:*)
|
Intel:Mach:3*:*)
|
||||||
echo i386-pc-mach3
|
echo i386-pc-mach3
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -1158,8 +1133,18 @@ EOF
|
|||||||
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
|
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
|
||||||
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
|
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
|
||||||
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
|
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
|
||||||
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
|
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
|
||||||
&& { echo i486-ncr-sysv4; exit; } ;;
|
&& { echo i486-ncr-sysv4; exit; } ;;
|
||||||
|
NCR*:*:4.2:* | MPRAS*:*:4.2:*)
|
||||||
|
OS_REL='.3'
|
||||||
|
test -r /etc/.relid \
|
||||||
|
&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
|
||||||
|
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
|
||||||
|
&& { echo i486-ncr-sysv4.3${OS_REL}; exit; }
|
||||||
|
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
|
||||||
|
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; }
|
||||||
|
/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
|
||||||
|
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
|
||||||
m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
|
m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
|
||||||
echo m68k-unknown-lynxos${UNAME_RELEASE}
|
echo m68k-unknown-lynxos${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -1172,7 +1157,7 @@ EOF
|
|||||||
rs6000:LynxOS:2.*:*)
|
rs6000:LynxOS:2.*:*)
|
||||||
echo rs6000-unknown-lynxos${UNAME_RELEASE}
|
echo rs6000-unknown-lynxos${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
|
PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
|
||||||
echo powerpc-unknown-lynxos${UNAME_RELEASE}
|
echo powerpc-unknown-lynxos${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
SM[BE]S:UNIX_SV:*:*)
|
SM[BE]S:UNIX_SV:*:*)
|
||||||
@ -1192,10 +1177,10 @@ EOF
|
|||||||
echo ns32k-sni-sysv
|
echo ns32k-sni-sysv
|
||||||
fi
|
fi
|
||||||
exit ;;
|
exit ;;
|
||||||
PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
|
PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
|
||||||
# says <Richard.M.Bartel@ccMail.Census.GOV>
|
# says <Richard.M.Bartel@ccMail.Census.GOV>
|
||||||
echo i586-unisys-sysv4
|
echo i586-unisys-sysv4
|
||||||
exit ;;
|
exit ;;
|
||||||
*:UNIX_System_V:4*:FTX*)
|
*:UNIX_System_V:4*:FTX*)
|
||||||
# From Gerald Hewes <hewes@openmarket.com>.
|
# From Gerald Hewes <hewes@openmarket.com>.
|
||||||
# How about differentiating between stratus architectures? -djm
|
# How about differentiating between stratus architectures? -djm
|
||||||
@ -1221,11 +1206,11 @@ EOF
|
|||||||
exit ;;
|
exit ;;
|
||||||
R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
|
R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
|
||||||
if [ -d /usr/nec ]; then
|
if [ -d /usr/nec ]; then
|
||||||
echo mips-nec-sysv${UNAME_RELEASE}
|
echo mips-nec-sysv${UNAME_RELEASE}
|
||||||
else
|
else
|
||||||
echo mips-unknown-sysv${UNAME_RELEASE}
|
echo mips-unknown-sysv${UNAME_RELEASE}
|
||||||
fi
|
fi
|
||||||
exit ;;
|
exit ;;
|
||||||
BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
|
BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
|
||||||
echo powerpc-be-beos
|
echo powerpc-be-beos
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -1238,6 +1223,9 @@ EOF
|
|||||||
BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
|
BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
|
||||||
echo i586-pc-haiku
|
echo i586-pc-haiku
|
||||||
exit ;;
|
exit ;;
|
||||||
|
x86_64:Haiku:*:*)
|
||||||
|
echo x86_64-unknown-haiku
|
||||||
|
exit ;;
|
||||||
SX-4:SUPER-UX:*:*)
|
SX-4:SUPER-UX:*:*)
|
||||||
echo sx4-nec-superux${UNAME_RELEASE}
|
echo sx4-nec-superux${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -1265,6 +1253,16 @@ EOF
|
|||||||
*:Darwin:*:*)
|
*:Darwin:*:*)
|
||||||
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
|
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
|
||||||
case $UNAME_PROCESSOR in
|
case $UNAME_PROCESSOR in
|
||||||
|
i386)
|
||||||
|
eval $set_cc_for_build
|
||||||
|
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
|
||||||
|
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
|
||||||
|
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||||
|
grep IS_64BIT_ARCH >/dev/null
|
||||||
|
then
|
||||||
|
UNAME_PROCESSOR="x86_64"
|
||||||
|
fi
|
||||||
|
fi ;;
|
||||||
unknown) UNAME_PROCESSOR=powerpc ;;
|
unknown) UNAME_PROCESSOR=powerpc ;;
|
||||||
esac
|
esac
|
||||||
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
|
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
|
||||||
@ -1280,7 +1278,10 @@ EOF
|
|||||||
*:QNX:*:4*)
|
*:QNX:*:4*)
|
||||||
echo i386-pc-qnx
|
echo i386-pc-qnx
|
||||||
exit ;;
|
exit ;;
|
||||||
NSE-?:NONSTOP_KERNEL:*:*)
|
NEO-?:NONSTOP_KERNEL:*:*)
|
||||||
|
echo neo-tandem-nsk${UNAME_RELEASE}
|
||||||
|
exit ;;
|
||||||
|
NSE-*:NONSTOP_KERNEL:*:*)
|
||||||
echo nse-tandem-nsk${UNAME_RELEASE}
|
echo nse-tandem-nsk${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
NSR-?:NONSTOP_KERNEL:*:*)
|
NSR-?:NONSTOP_KERNEL:*:*)
|
||||||
@ -1325,13 +1326,13 @@ EOF
|
|||||||
echo pdp10-unknown-its
|
echo pdp10-unknown-its
|
||||||
exit ;;
|
exit ;;
|
||||||
SEI:*:*:SEIUX)
|
SEI:*:*:SEIUX)
|
||||||
echo mips-sei-seiux${UNAME_RELEASE}
|
echo mips-sei-seiux${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
*:DragonFly:*:*)
|
*:DragonFly:*:*)
|
||||||
echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
|
echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
|
||||||
exit ;;
|
exit ;;
|
||||||
*:*VMS:*:*)
|
*:*VMS:*:*)
|
||||||
UNAME_MACHINE=`(uname -p) 2>/dev/null`
|
UNAME_MACHINE=`(uname -p) 2>/dev/null`
|
||||||
case "${UNAME_MACHINE}" in
|
case "${UNAME_MACHINE}" in
|
||||||
A*) echo alpha-dec-vms ; exit ;;
|
A*) echo alpha-dec-vms ; exit ;;
|
||||||
I*) echo ia64-dec-vms ; exit ;;
|
I*) echo ia64-dec-vms ; exit ;;
|
||||||
@ -1346,11 +1347,14 @@ EOF
|
|||||||
i*86:rdos:*:*)
|
i*86:rdos:*:*)
|
||||||
echo ${UNAME_MACHINE}-pc-rdos
|
echo ${UNAME_MACHINE}-pc-rdos
|
||||||
exit ;;
|
exit ;;
|
||||||
|
i*86:AROS:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-pc-aros
|
||||||
|
exit ;;
|
||||||
|
x86_64:VMkernel:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-esx
|
||||||
|
exit ;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
#echo '(No uname command or uname output not recognized.)' 1>&2
|
|
||||||
#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
|
|
||||||
|
|
||||||
eval $set_cc_for_build
|
eval $set_cc_for_build
|
||||||
cat >$dummy.c <<EOF
|
cat >$dummy.c <<EOF
|
||||||
#ifdef _SEQUENT_
|
#ifdef _SEQUENT_
|
||||||
@ -1368,11 +1372,11 @@ main ()
|
|||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
printf ("m68k-sony-newsos%s\n",
|
printf ("m68k-sony-newsos%s\n",
|
||||||
#ifdef NEWSOS4
|
#ifdef NEWSOS4
|
||||||
"4"
|
"4"
|
||||||
#else
|
#else
|
||||||
""
|
""
|
||||||
#endif
|
#endif
|
||||||
); exit (0);
|
); exit (0);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
/* config.h. Generated from config.h.in by configure. */
|
/* config.h. Generated from config.h.in by configure. */
|
||||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
|
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||||
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
|
systems.
|
||||||
|
|
||||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
In environments that support the facilities, config.h.in is converted by
|
||||||
Some other environments also support the use of "configure". PCRE is written in
|
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
||||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
are going to build PCRE "by hand" without using "configure" or CMake, you
|
||||||
it to run on SunOS4 and other "close to standard" systems.
|
should copy the distributed config.h.generic to config.h, and then edit the
|
||||||
|
macro definitions to be the way you need them. You must then add
|
||||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
||||||
should copy the distributed config.h.generic to config.h, and then set up the
|
at the start of every source.
|
||||||
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
|
|
||||||
all of your compile commands, so that config.h is included at the start of
|
|
||||||
every source.
|
|
||||||
|
|
||||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||||
@ -21,20 +21,28 @@ HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
|||||||
them both to 0; an emulation function will be used. */
|
them both to 0; an emulation function will be used. */
|
||||||
|
|
||||||
/* By default, the \R escape sequence matches any Unicode line ending
|
/* By default, the \R escape sequence matches any Unicode line ending
|
||||||
character or sequence of characters. If BSR_ANYCRLF is defined, this is
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
changed so that backslash-R matches only CR, LF, or CRLF. The build- time
|
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||||
default can be overridden by the user of PCRE at runtime. On systems that
|
The build-time default can be overridden by the user of PCRE at runtime. */
|
||||||
support it, "configure" can be used to override the default. */
|
|
||||||
/* #undef BSR_ANYCRLF */
|
/* #undef BSR_ANYCRLF */
|
||||||
|
|
||||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||||
character codes, define this macro as 1. On systems that can use
|
character codes, define this macro to any value. You must also edit the
|
||||||
"configure", this can be done via --enable-ebcdic. PCRE will then assume
|
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
|
||||||
that all input strings are in EBCDIC. If you do not define this macro, PCRE
|
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
|
||||||
will assume input strings are ASCII or UTF-8 Unicode. It is not possible to
|
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
|
||||||
build a version of PCRE that supports both EBCDIC and UTF-8. */
|
strings are in EBCDIC. If you do not define this macro, PCRE will assume
|
||||||
|
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
|
||||||
|
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
|
||||||
/* #undef EBCDIC */
|
/* #undef EBCDIC */
|
||||||
|
|
||||||
|
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||||
|
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||||
|
LF does in an ASCII/Unicode environment. The value must also be set in the
|
||||||
|
NEWLINE macro below. On systems that can use "configure" or CMake to set
|
||||||
|
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
|
||||||
|
/* #undef EBCDIC_NL25 */
|
||||||
|
|
||||||
/* Define to 1 if you have the `bcopy' function. */
|
/* Define to 1 if you have the `bcopy' function. */
|
||||||
#ifndef HAVE_BCOPY
|
#ifndef HAVE_BCOPY
|
||||||
#define HAVE_BCOPY 1
|
#define HAVE_BCOPY 1
|
||||||
@ -58,6 +66,12 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define HAVE_DLFCN_H 1
|
#define HAVE_DLFCN_H 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||||
|
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||||
|
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||||
|
|
||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#ifndef HAVE_INTTYPES_H
|
#ifndef HAVE_INTTYPES_H
|
||||||
#define HAVE_INTTYPES_H 1
|
#define HAVE_INTTYPES_H 1
|
||||||
@ -83,15 +97,17 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define HAVE_MEMORY_H 1
|
#define HAVE_MEMORY_H 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
|
/* #undef HAVE_PTHREAD */
|
||||||
|
|
||||||
|
/* Have PTHREAD_PRIO_INHERIT. */
|
||||||
|
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||||
|
|
||||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||||
#ifndef HAVE_READLINE_HISTORY_H
|
/* #undef HAVE_READLINE_HISTORY_H */
|
||||||
#define HAVE_READLINE_HISTORY_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||||
#ifndef HAVE_READLINE_READLINE_H
|
/* #undef HAVE_READLINE_READLINE_H */
|
||||||
#define HAVE_READLINE_READLINE_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
#ifndef HAVE_STDINT_H
|
#ifndef HAVE_STDINT_H
|
||||||
@ -123,10 +139,13 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define HAVE_STRING_H 1
|
#define HAVE_STRING_H 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Define to 1 if you have the `strtoll' function. */
|
/* Define to 1 if you have `strtoimax'. */
|
||||||
|
/* #undef HAVE_STRTOIMAX */
|
||||||
|
|
||||||
|
/* Define to 1 if you have `strtoll'. */
|
||||||
/* #undef HAVE_STRTOLL */
|
/* #undef HAVE_STRTOLL */
|
||||||
|
|
||||||
/* Define to 1 if you have the `strtoq' function. */
|
/* Define to 1 if you have `strtoq'. */
|
||||||
#ifndef HAVE_STRTOQ
|
#ifndef HAVE_STRTOQ
|
||||||
#define HAVE_STRTOQ 1
|
#define HAVE_STRTOQ 1
|
||||||
#endif
|
#endif
|
||||||
@ -154,6 +173,12 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
#define HAVE_UNSIGNED_LONG_LONG 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
||||||
|
declarations. */
|
||||||
|
#ifndef HAVE_VISIBILITY
|
||||||
|
#define HAVE_VISIBILITY 1
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Define to 1 if you have the <windows.h> header file. */
|
/* Define to 1 if you have the <windows.h> header file. */
|
||||||
/* #undef HAVE_WINDOWS_H */
|
/* #undef HAVE_WINDOWS_H */
|
||||||
|
|
||||||
@ -162,26 +187,30 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define HAVE_ZLIB_H 1
|
#define HAVE_ZLIB_H 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Define to 1 if you have the `_strtoi64' function. */
|
/* Define to 1 if you have `_strtoi64'. */
|
||||||
/* #undef HAVE__STRTOI64 */
|
/* #undef HAVE__STRTOI64 */
|
||||||
|
|
||||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||||
as offsets within the compiled regex. The default is 2, which allows for
|
as offsets within the compiled regex. The default is 2, which allows for
|
||||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||||
for longer patterns in extreme cases. On systems that support it,
|
for longer patterns in extreme cases. */
|
||||||
"configure" can be used to override this default. */
|
|
||||||
#ifndef LINK_SIZE
|
#ifndef LINK_SIZE
|
||||||
#define LINK_SIZE 2
|
#define LINK_SIZE 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||||
|
*/
|
||||||
|
#ifndef LT_OBJDIR
|
||||||
|
#define LT_OBJDIR ".libs/"
|
||||||
|
#endif
|
||||||
|
|
||||||
/* The value of MATCH_LIMIT determines the default number of times the
|
/* The value of MATCH_LIMIT determines the default number of times the
|
||||||
internal match() function can be called during a single execution of
|
internal match() function can be called during a single execution of
|
||||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||||
The limit exists in order to catch runaway regular expressions that take
|
The limit exists in order to catch runaway regular expressions that take
|
||||||
for ever to determine that they do not match. The default is set very large
|
for ever to determine that they do not match. The default is set very large
|
||||||
so that it does not accidentally catch legitimate cases. On systems that
|
so that it does not accidentally catch legitimate cases. */
|
||||||
support it, "configure" can be used to override this default default. */
|
|
||||||
#ifndef MATCH_LIMIT
|
#ifndef MATCH_LIMIT
|
||||||
#define MATCH_LIMIT 10000000
|
#define MATCH_LIMIT 10000000
|
||||||
#endif
|
#endif
|
||||||
@ -193,8 +222,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||||
match(). To have any useful effect, it must be less than the value of
|
match(). To have any useful effect, it must be less than the value of
|
||||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||||
a runtime method for setting a different limit. On systems that support it,
|
a runtime method for setting a different limit. */
|
||||||
"configure" can be used to override the default. */
|
|
||||||
#ifndef MATCH_LIMIT_RECURSION
|
#ifndef MATCH_LIMIT_RECURSION
|
||||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||||
#endif
|
#endif
|
||||||
@ -213,22 +241,28 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define MAX_NAME_SIZE 32
|
#define MAX_NAME_SIZE 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The value of NEWLINE determines the newline character sequence. On systems
|
/* The value of NEWLINE determines the default newline character sequence.
|
||||||
that support it, "configure" can be used to override the default, which is
|
PCRE client programs can override this by selecting other values at run
|
||||||
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
|
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
|
||||||
(ANYCRLF). */
|
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
|
||||||
|
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
|
||||||
|
0x25) that are used as the NL line terminator that is equivalent to ASCII
|
||||||
|
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
|
||||||
|
or -2 (ANYCRLF). */
|
||||||
#ifndef NEWLINE
|
#ifndef NEWLINE
|
||||||
#define NEWLINE 10
|
#define NEWLINE 10
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
||||||
|
/* #undef NO_MINUS_C_MINUS_O */
|
||||||
|
|
||||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||||
This can sometimes be a problem on systems that have stacks of limited
|
This can sometimes be a problem on systems that have stacks of limited
|
||||||
size. Define NO_RECURSE to get a version that doesn't use recursion in the
|
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||||
match() function; instead it creates its own stack by steam using
|
recursion in the match() function; instead it creates its own stack by
|
||||||
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
|
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
|
||||||
the comments and other stuff just above the match() function. On systems
|
detail, see the comments and other stuff just above the match() function.
|
||||||
that support it, "configure" can be used to set this in the Makefile (use
|
*/
|
||||||
--disable-stack-for-recursion). */
|
|
||||||
/* #undef NO_RECURSE */
|
/* #undef NO_RECURSE */
|
||||||
|
|
||||||
/* Name of package */
|
/* Name of package */
|
||||||
@ -241,27 +275,38 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define PACKAGE_NAME "PCRE"
|
#define PACKAGE_NAME "PCRE"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "PCRE 7.9"
|
#define PACKAGE_STRING "PCRE 8.32"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "pcre"
|
#define PACKAGE_TARNAME "pcre"
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the home page for this package. */
|
||||||
#define PACKAGE_VERSION "7.9"
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#define PACKAGE_VERSION "8.32"
|
||||||
|
|
||||||
|
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||||
|
pcregrep to hold parts of the file it is searching. This is also the
|
||||||
|
minimum value. The actual amount of memory used by pcregrep is three times
|
||||||
|
this number, because it allows for the buffering of "before" and "after"
|
||||||
|
lines. */
|
||||||
|
#ifndef PCREGREP_BUFSIZE
|
||||||
|
#define PCREGREP_BUFSIZE 20480
|
||||||
|
#endif
|
||||||
|
|
||||||
/* If you are compiling for a system other than a Unix-like system or
|
/* If you are compiling for a system other than a Unix-like system or
|
||||||
Win32, and it needs some magic to be inserted before the definition
|
Win32, and it needs some magic to be inserted before the definition
|
||||||
of a function that is exported by the library, define this macro to
|
of a function that is exported by the library, define this macro to
|
||||||
contain the relevant magic. If you do not define this macro, it
|
contain the relevant magic. If you do not define this macro, a suitable
|
||||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
__declspec value is used for Windows systems; in other environments
|
||||||
compiler on non-Win32 systems. This macro apears at the start of
|
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||||
every exported function that is part of the external API. It does
|
This macro apears at the start of every exported function that is part
|
||||||
not appear on functions that are "external" in the C sense, but
|
of the external API. It does not appear on functions that are "external"
|
||||||
which are internal to the library. */
|
in the C sense, but which are internal to the library. */
|
||||||
/* #undef PCRE_EXP_DEFN */
|
/* #undef PCRE_EXP_DEFN */
|
||||||
|
|
||||||
/* Define if linking statically (TODO: make nice with Libtool) */
|
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||||
/* #undef PCRE_STATIC */
|
/* #undef PCRE_STATIC */
|
||||||
|
|
||||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||||
@ -270,44 +315,78 @@ them both to 0; an emulation function will be used. */
|
|||||||
only two. If the number of expected substrings is small, the wrapper
|
only two. If the number of expected substrings is small, the wrapper
|
||||||
function uses space on the stack, because this is faster than using
|
function uses space on the stack, because this is faster than using
|
||||||
malloc() for each call. The threshold above which the stack is no longer
|
malloc() for each call. The threshold above which the stack is no longer
|
||||||
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
|
used is defined by POSIX_MALLOC_THRESHOLD. */
|
||||||
"configure" can be used to override this default. */
|
|
||||||
#ifndef POSIX_MALLOC_THRESHOLD
|
#ifndef POSIX_MALLOC_THRESHOLD
|
||||||
#define POSIX_MALLOC_THRESHOLD 10
|
#define POSIX_MALLOC_THRESHOLD 10
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||||
|
your system. */
|
||||||
|
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
#ifndef STDC_HEADERS
|
#ifndef STDC_HEADERS
|
||||||
#define STDC_HEADERS 1
|
#define STDC_HEADERS 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
|
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||||
handle .bz2 files. */
|
are able to generate code coverage reports. */
|
||||||
|
/* #undef SUPPORT_GCOV */
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||||
|
/* #undef SUPPORT_JIT */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||||
|
is able to handle .bz2 files. */
|
||||||
/* #undef SUPPORT_LIBBZ2 */
|
/* #undef SUPPORT_LIBBZ2 */
|
||||||
|
|
||||||
/* Define to allow pcretest to be linked with libreadline. */
|
/* Define to any value to allow pcretest to be linked with libedit. */
|
||||||
|
/* #undef SUPPORT_LIBEDIT */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcretest to be linked with libreadline. */
|
||||||
/* #undef SUPPORT_LIBREADLINE */
|
/* #undef SUPPORT_LIBREADLINE */
|
||||||
|
|
||||||
/* Define to allow pcregrep to be linked with libz, so that it is able to
|
/* Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||||
handle .gz files. */
|
able to handle .gz files. */
|
||||||
/* #undef SUPPORT_LIBZ */
|
/* #undef SUPPORT_LIBZ */
|
||||||
|
|
||||||
/* Define to enable support for Unicode properties */
|
/* Define to any value to enable the 16 bit PCRE library. */
|
||||||
|
/* #undef SUPPORT_PCRE16 */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 32 bit PCRE library. */
|
||||||
|
/* #undef SUPPORT_PCRE32 */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 8 bit PCRE library. */
|
||||||
|
#ifndef SUPPORT_PCRE8
|
||||||
|
#define SUPPORT_PCRE8 /**/
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define to any value to enable JIT support in pcregrep. */
|
||||||
|
/* #undef SUPPORT_PCREGREP_JIT */
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Unicode properties. */
|
||||||
/* #undef SUPPORT_UCP */
|
/* #undef SUPPORT_UCP */
|
||||||
|
|
||||||
/* Define to enable support for the UTF-8 Unicode encoding. This will work
|
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||||
even in an EBCDIC environment, but it is incompatible with the EBCDIC
|
This will work even in an EBCDIC environment, but it is incompatible with
|
||||||
macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8, but
|
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
|
||||||
not both at once. */
|
ASCII/UTF-8/16/32, but not both at once. */
|
||||||
/* #undef SUPPORT_UTF8 */
|
/* #undef SUPPORT_UTF */
|
||||||
|
|
||||||
|
/* Valgrind support to find invalid memory reads. */
|
||||||
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#ifndef VERSION
|
#ifndef VERSION
|
||||||
#define VERSION "7.9"
|
#define VERSION "8.32"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
/* #undef const */
|
/* #undef const */
|
||||||
|
|
||||||
|
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||||
|
such a type exists and the standard includes do not define it. */
|
||||||
|
/* #undef int64_t */
|
||||||
|
|
||||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||||
/* #undef size_t */
|
/* #undef size_t */
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
|
|
||||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||||
Some other environments also support the use of "configure". PCRE is written in
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
systems.
|
||||||
it to run on SunOS4 and other "close to standard" systems.
|
|
||||||
|
|
||||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
In environments that support the facilities, config.h.in is converted by
|
||||||
should copy the distributed config.h.generic to config.h, and then set up the
|
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
||||||
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
|
are going to build PCRE "by hand" without using "configure" or CMake, you
|
||||||
all of your compile commands, so that config.h is included at the start of
|
should copy the distributed config.h.generic to config.h, and then edit the
|
||||||
every source.
|
macro definitions to be the way you need them. You must then add
|
||||||
|
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
||||||
|
at the start of every source.
|
||||||
|
|
||||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||||
@ -20,20 +21,28 @@ HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
|||||||
them both to 0; an emulation function will be used. */
|
them both to 0; an emulation function will be used. */
|
||||||
|
|
||||||
/* By default, the \R escape sequence matches any Unicode line ending
|
/* By default, the \R escape sequence matches any Unicode line ending
|
||||||
character or sequence of characters. If BSR_ANYCRLF is defined, this is
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
changed so that backslash-R matches only CR, LF, or CRLF. The build- time
|
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||||
default can be overridden by the user of PCRE at runtime. On systems that
|
The build-time default can be overridden by the user of PCRE at runtime. */
|
||||||
support it, "configure" can be used to override the default. */
|
|
||||||
#undef BSR_ANYCRLF
|
#undef BSR_ANYCRLF
|
||||||
|
|
||||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||||
character codes, define this macro as 1. On systems that can use
|
character codes, define this macro to any value. You must also edit the
|
||||||
"configure", this can be done via --enable-ebcdic. PCRE will then assume
|
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
|
||||||
that all input strings are in EBCDIC. If you do not define this macro, PCRE
|
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
|
||||||
will assume input strings are ASCII or UTF-8 Unicode. It is not possible to
|
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
|
||||||
build a version of PCRE that supports both EBCDIC and UTF-8. */
|
strings are in EBCDIC. If you do not define this macro, PCRE will assume
|
||||||
|
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
|
||||||
|
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
|
||||||
#undef EBCDIC
|
#undef EBCDIC
|
||||||
|
|
||||||
|
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||||
|
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||||
|
LF does in an ASCII/Unicode environment. The value must also be set in the
|
||||||
|
NEWLINE macro below. On systems that can use "configure" or CMake to set
|
||||||
|
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
|
||||||
|
#undef EBCDIC_NL25
|
||||||
|
|
||||||
/* Define to 1 if you have the `bcopy' function. */
|
/* Define to 1 if you have the `bcopy' function. */
|
||||||
#undef HAVE_BCOPY
|
#undef HAVE_BCOPY
|
||||||
|
|
||||||
@ -49,6 +58,12 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
#undef HAVE_DLFCN_H
|
#undef HAVE_DLFCN_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||||
|
#undef HAVE_EDITLINE_READLINE_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||||
|
#undef HAVE_EDIT_READLINE_READLINE_H
|
||||||
|
|
||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#undef HAVE_INTTYPES_H
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
@ -64,6 +79,12 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to 1 if you have the <memory.h> header file. */
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
#undef HAVE_MEMORY_H
|
#undef HAVE_MEMORY_H
|
||||||
|
|
||||||
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
|
#undef HAVE_PTHREAD
|
||||||
|
|
||||||
|
/* Have PTHREAD_PRIO_INHERIT. */
|
||||||
|
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||||
|
|
||||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||||
#undef HAVE_READLINE_HISTORY_H
|
#undef HAVE_READLINE_HISTORY_H
|
||||||
|
|
||||||
@ -88,10 +109,13 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to 1 if you have the <string.h> header file. */
|
/* Define to 1 if you have the <string.h> header file. */
|
||||||
#undef HAVE_STRING_H
|
#undef HAVE_STRING_H
|
||||||
|
|
||||||
/* Define to 1 if you have the `strtoll' function. */
|
/* Define to 1 if you have `strtoimax'. */
|
||||||
|
#undef HAVE_STRTOIMAX
|
||||||
|
|
||||||
|
/* Define to 1 if you have `strtoll'. */
|
||||||
#undef HAVE_STRTOLL
|
#undef HAVE_STRTOLL
|
||||||
|
|
||||||
/* Define to 1 if you have the `strtoq' function. */
|
/* Define to 1 if you have `strtoq'. */
|
||||||
#undef HAVE_STRTOQ
|
#undef HAVE_STRTOQ
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||||
@ -109,30 +133,36 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||||
#undef HAVE_UNSIGNED_LONG_LONG
|
#undef HAVE_UNSIGNED_LONG_LONG
|
||||||
|
|
||||||
|
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
||||||
|
declarations. */
|
||||||
|
#undef HAVE_VISIBILITY
|
||||||
|
|
||||||
/* Define to 1 if you have the <windows.h> header file. */
|
/* Define to 1 if you have the <windows.h> header file. */
|
||||||
#undef HAVE_WINDOWS_H
|
#undef HAVE_WINDOWS_H
|
||||||
|
|
||||||
/* Define to 1 if you have the <zlib.h> header file. */
|
/* Define to 1 if you have the <zlib.h> header file. */
|
||||||
#undef HAVE_ZLIB_H
|
#undef HAVE_ZLIB_H
|
||||||
|
|
||||||
/* Define to 1 if you have the `_strtoi64' function. */
|
/* Define to 1 if you have `_strtoi64'. */
|
||||||
#undef HAVE__STRTOI64
|
#undef HAVE__STRTOI64
|
||||||
|
|
||||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||||
as offsets within the compiled regex. The default is 2, which allows for
|
as offsets within the compiled regex. The default is 2, which allows for
|
||||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||||
for longer patterns in extreme cases. On systems that support it,
|
for longer patterns in extreme cases. */
|
||||||
"configure" can be used to override this default. */
|
|
||||||
#undef LINK_SIZE
|
#undef LINK_SIZE
|
||||||
|
|
||||||
|
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||||
|
*/
|
||||||
|
#undef LT_OBJDIR
|
||||||
|
|
||||||
/* The value of MATCH_LIMIT determines the default number of times the
|
/* The value of MATCH_LIMIT determines the default number of times the
|
||||||
internal match() function can be called during a single execution of
|
internal match() function can be called during a single execution of
|
||||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||||
The limit exists in order to catch runaway regular expressions that take
|
The limit exists in order to catch runaway regular expressions that take
|
||||||
for ever to determine that they do not match. The default is set very large
|
for ever to determine that they do not match. The default is set very large
|
||||||
so that it does not accidentally catch legitimate cases. On systems that
|
so that it does not accidentally catch legitimate cases. */
|
||||||
support it, "configure" can be used to override this default default. */
|
|
||||||
#undef MATCH_LIMIT
|
#undef MATCH_LIMIT
|
||||||
|
|
||||||
/* The above limit applies to all calls of match(), whether or not they
|
/* The above limit applies to all calls of match(), whether or not they
|
||||||
@ -142,8 +172,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||||
match(). To have any useful effect, it must be less than the value of
|
match(). To have any useful effect, it must be less than the value of
|
||||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||||
a runtime method for setting a different limit. On systems that support it,
|
a runtime method for setting a different limit. */
|
||||||
"configure" can be used to override the default. */
|
|
||||||
#undef MATCH_LIMIT_RECURSION
|
#undef MATCH_LIMIT_RECURSION
|
||||||
|
|
||||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||||
@ -156,20 +185,26 @@ them both to 0; an emulation function will be used. */
|
|||||||
overflow caused by enormously large patterns. */
|
overflow caused by enormously large patterns. */
|
||||||
#undef MAX_NAME_SIZE
|
#undef MAX_NAME_SIZE
|
||||||
|
|
||||||
/* The value of NEWLINE determines the newline character sequence. On systems
|
/* The value of NEWLINE determines the default newline character sequence.
|
||||||
that support it, "configure" can be used to override the default, which is
|
PCRE client programs can override this by selecting other values at run
|
||||||
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
|
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
|
||||||
(ANYCRLF). */
|
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
|
||||||
|
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
|
||||||
|
0x25) that are used as the NL line terminator that is equivalent to ASCII
|
||||||
|
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
|
||||||
|
or -2 (ANYCRLF). */
|
||||||
#undef NEWLINE
|
#undef NEWLINE
|
||||||
|
|
||||||
|
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
||||||
|
#undef NO_MINUS_C_MINUS_O
|
||||||
|
|
||||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||||
This can sometimes be a problem on systems that have stacks of limited
|
This can sometimes be a problem on systems that have stacks of limited
|
||||||
size. Define NO_RECURSE to get a version that doesn't use recursion in the
|
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||||
match() function; instead it creates its own stack by steam using
|
recursion in the match() function; instead it creates its own stack by
|
||||||
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
|
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
|
||||||
the comments and other stuff just above the match() function. On systems
|
detail, see the comments and other stuff just above the match() function.
|
||||||
that support it, "configure" can be used to set this in the Makefile (use
|
*/
|
||||||
--disable-stack-for-recursion). */
|
|
||||||
#undef NO_RECURSE
|
#undef NO_RECURSE
|
||||||
|
|
||||||
/* Name of package */
|
/* Name of package */
|
||||||
@ -187,22 +222,50 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#undef PACKAGE_TARNAME
|
#undef PACKAGE_TARNAME
|
||||||
|
|
||||||
|
/* Define to the home page for this package. */
|
||||||
|
#undef PACKAGE_URL
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#undef PACKAGE_VERSION
|
#undef PACKAGE_VERSION
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#undef PCRECPP_EXP_DECL
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#undef PCRECPP_EXP_DEFN
|
||||||
|
|
||||||
|
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||||
|
pcregrep to hold parts of the file it is searching. This is also the
|
||||||
|
minimum value. The actual amount of memory used by pcregrep is three times
|
||||||
|
this number, because it allows for the buffering of "before" and "after"
|
||||||
|
lines. */
|
||||||
|
#undef PCREGREP_BUFSIZE
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#undef PCREPOSIX_EXP_DECL
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#undef PCREPOSIX_EXP_DEFN
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#undef PCRE_EXP_DATA_DEFN
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#undef PCRE_EXP_DECL
|
||||||
|
|
||||||
|
|
||||||
/* If you are compiling for a system other than a Unix-like system or
|
/* If you are compiling for a system other than a Unix-like system or
|
||||||
Win32, and it needs some magic to be inserted before the definition
|
Win32, and it needs some magic to be inserted before the definition
|
||||||
of a function that is exported by the library, define this macro to
|
of a function that is exported by the library, define this macro to
|
||||||
contain the relevant magic. If you do not define this macro, it
|
contain the relevant magic. If you do not define this macro, a suitable
|
||||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
__declspec value is used for Windows systems; in other environments
|
||||||
compiler on non-Win32 systems. This macro apears at the start of
|
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||||
every exported function that is part of the external API. It does
|
This macro apears at the start of every exported function that is part
|
||||||
not appear on functions that are "external" in the C sense, but
|
of the external API. It does not appear on functions that are "external"
|
||||||
which are internal to the library. */
|
in the C sense, but which are internal to the library. */
|
||||||
#undef PCRE_EXP_DEFN
|
#undef PCRE_EXP_DEFN
|
||||||
|
|
||||||
/* Define if linking statically (TODO: make nice with Libtool) */
|
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||||
#undef PCRE_STATIC
|
#undef PCRE_STATIC
|
||||||
|
|
||||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||||
@ -211,32 +274,60 @@ them both to 0; an emulation function will be used. */
|
|||||||
only two. If the number of expected substrings is small, the wrapper
|
only two. If the number of expected substrings is small, the wrapper
|
||||||
function uses space on the stack, because this is faster than using
|
function uses space on the stack, because this is faster than using
|
||||||
malloc() for each call. The threshold above which the stack is no longer
|
malloc() for each call. The threshold above which the stack is no longer
|
||||||
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
|
used is defined by POSIX_MALLOC_THRESHOLD. */
|
||||||
"configure" can be used to override this default. */
|
|
||||||
#undef POSIX_MALLOC_THRESHOLD
|
#undef POSIX_MALLOC_THRESHOLD
|
||||||
|
|
||||||
|
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||||
|
your system. */
|
||||||
|
#undef PTHREAD_CREATE_JOINABLE
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
#undef STDC_HEADERS
|
#undef STDC_HEADERS
|
||||||
|
|
||||||
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
|
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||||
handle .bz2 files. */
|
are able to generate code coverage reports. */
|
||||||
|
#undef SUPPORT_GCOV
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||||
|
#undef SUPPORT_JIT
|
||||||
|
|
||||||
|
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||||
|
is able to handle .bz2 files. */
|
||||||
#undef SUPPORT_LIBBZ2
|
#undef SUPPORT_LIBBZ2
|
||||||
|
|
||||||
/* Define to allow pcretest to be linked with libreadline. */
|
/* Define to any value to allow pcretest to be linked with libedit. */
|
||||||
|
#undef SUPPORT_LIBEDIT
|
||||||
|
|
||||||
|
/* Define to any value to allow pcretest to be linked with libreadline. */
|
||||||
#undef SUPPORT_LIBREADLINE
|
#undef SUPPORT_LIBREADLINE
|
||||||
|
|
||||||
/* Define to allow pcregrep to be linked with libz, so that it is able to
|
/* Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||||
handle .gz files. */
|
able to handle .gz files. */
|
||||||
#undef SUPPORT_LIBZ
|
#undef SUPPORT_LIBZ
|
||||||
|
|
||||||
/* Define to enable support for Unicode properties */
|
/* Define to any value to enable the 16 bit PCRE library. */
|
||||||
|
#undef SUPPORT_PCRE16
|
||||||
|
|
||||||
|
/* Define to any value to enable the 32 bit PCRE library. */
|
||||||
|
#undef SUPPORT_PCRE32
|
||||||
|
|
||||||
|
/* Define to any value to enable the 8 bit PCRE library. */
|
||||||
|
#undef SUPPORT_PCRE8
|
||||||
|
|
||||||
|
/* Define to any value to enable JIT support in pcregrep. */
|
||||||
|
#undef SUPPORT_PCREGREP_JIT
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Unicode properties. */
|
||||||
#undef SUPPORT_UCP
|
#undef SUPPORT_UCP
|
||||||
|
|
||||||
/* Define to enable support for the UTF-8 Unicode encoding. This will work
|
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||||
even in an EBCDIC environment, but it is incompatible with the EBCDIC
|
This will work even in an EBCDIC environment, but it is incompatible with
|
||||||
macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8, but
|
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
|
||||||
not both at once. */
|
ASCII/UTF-8/16/32, but not both at once. */
|
||||||
#undef SUPPORT_UTF8
|
#undef SUPPORT_UTF
|
||||||
|
|
||||||
|
/* Valgrind support to find invalid memory reads. */
|
||||||
|
#undef SUPPORT_VALGRIND
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#undef VERSION
|
#undef VERSION
|
||||||
@ -244,5 +335,9 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
#undef const
|
#undef const
|
||||||
|
|
||||||
|
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||||
|
such a type exists and the standard includes do not define it. */
|
||||||
|
#undef int64_t
|
||||||
|
|
||||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||||
#undef size_t
|
#undef size_t
|
||||||
|
259
tools/pcre/config.sub
vendored
259
tools/pcre/config.sub
vendored
@ -1,10 +1,10 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Configuration validation subroutine script.
|
# Configuration validation subroutine script.
|
||||||
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
||||||
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
|
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||||
# Free Software Foundation, Inc.
|
# 2011, 2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
timestamp='2008-09-08'
|
timestamp='2012-08-18'
|
||||||
|
|
||||||
# This file is (in principle) common to ALL GNU software.
|
# This file is (in principle) common to ALL GNU software.
|
||||||
# The presence of a machine in this file suggests that SOME GNU software
|
# The presence of a machine in this file suggests that SOME GNU software
|
||||||
@ -21,9 +21,7 @@ timestamp='2008-09-08'
|
|||||||
# GNU General Public License for more details.
|
# GNU General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||||
# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
|
||||||
# 02110-1301, USA.
|
|
||||||
#
|
#
|
||||||
# As a special exception to the GNU General Public License, if you
|
# As a special exception to the GNU General Public License, if you
|
||||||
# distribute this file as part of a program that contains a
|
# distribute this file as part of a program that contains a
|
||||||
@ -32,13 +30,16 @@ timestamp='2008-09-08'
|
|||||||
|
|
||||||
|
|
||||||
# Please send patches to <config-patches@gnu.org>. Submit a context
|
# Please send patches to <config-patches@gnu.org>. Submit a context
|
||||||
# diff and a properly formatted ChangeLog entry.
|
# diff and a properly formatted GNU ChangeLog entry.
|
||||||
#
|
#
|
||||||
# Configuration subroutine to validate and canonicalize a configuration type.
|
# Configuration subroutine to validate and canonicalize a configuration type.
|
||||||
# Supply the specified configuration type as an argument.
|
# Supply the specified configuration type as an argument.
|
||||||
# If it is invalid, we print an error message on stderr and exit with code 1.
|
# If it is invalid, we print an error message on stderr and exit with code 1.
|
||||||
# Otherwise, we print the canonical config type on stdout and succeed.
|
# Otherwise, we print the canonical config type on stdout and succeed.
|
||||||
|
|
||||||
|
# You can get the latest version of this script from:
|
||||||
|
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
|
||||||
|
|
||||||
# This file is supposed to be the same for all GNU packages
|
# This file is supposed to be the same for all GNU packages
|
||||||
# and recognize all the CPU types, system types and aliases
|
# and recognize all the CPU types, system types and aliases
|
||||||
# that are meaningful with *any* GNU software.
|
# that are meaningful with *any* GNU software.
|
||||||
@ -72,8 +73,9 @@ Report bugs and patches to <config-patches@gnu.org>."
|
|||||||
version="\
|
version="\
|
||||||
GNU config.sub ($timestamp)
|
GNU config.sub ($timestamp)
|
||||||
|
|
||||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
|
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
||||||
2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
|
||||||
|
Free Software Foundation, Inc.
|
||||||
|
|
||||||
This is free software; see the source for copying conditions. There is NO
|
This is free software; see the source for copying conditions. There is NO
|
||||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||||
@ -120,12 +122,18 @@ esac
|
|||||||
# Here we must recognize all the valid KERNEL-OS combinations.
|
# Here we must recognize all the valid KERNEL-OS combinations.
|
||||||
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
|
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
|
||||||
case $maybe_os in
|
case $maybe_os in
|
||||||
nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
|
nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
|
||||||
uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
|
linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
|
||||||
|
knetbsd*-gnu* | netbsd*-gnu* | \
|
||||||
|
kopensolaris*-gnu* | \
|
||||||
storm-chaos* | os2-emx* | rtmk-nova*)
|
storm-chaos* | os2-emx* | rtmk-nova*)
|
||||||
os=-$maybe_os
|
os=-$maybe_os
|
||||||
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
|
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
|
||||||
;;
|
;;
|
||||||
|
android-linux)
|
||||||
|
os=-linux-android
|
||||||
|
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
basic_machine=`echo $1 | sed 's/-[^-]*$//'`
|
basic_machine=`echo $1 | sed 's/-[^-]*$//'`
|
||||||
if [ $basic_machine != $1 ]
|
if [ $basic_machine != $1 ]
|
||||||
@ -148,10 +156,13 @@ case $os in
|
|||||||
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
|
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
|
||||||
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
|
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
|
||||||
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
|
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
|
||||||
-apple | -axis | -knuth | -cray)
|
-apple | -axis | -knuth | -cray | -microblaze)
|
||||||
os=
|
os=
|
||||||
basic_machine=$1
|
basic_machine=$1
|
||||||
;;
|
;;
|
||||||
|
-bluegene*)
|
||||||
|
os=-cnk
|
||||||
|
;;
|
||||||
-sim | -cisco | -oki | -wec | -winbond)
|
-sim | -cisco | -oki | -wec | -winbond)
|
||||||
os=
|
os=
|
||||||
basic_machine=$1
|
basic_machine=$1
|
||||||
@ -166,10 +177,10 @@ case $os in
|
|||||||
os=-chorusos
|
os=-chorusos
|
||||||
basic_machine=$1
|
basic_machine=$1
|
||||||
;;
|
;;
|
||||||
-chorusrdb)
|
-chorusrdb)
|
||||||
os=-chorusrdb
|
os=-chorusrdb
|
||||||
basic_machine=$1
|
basic_machine=$1
|
||||||
;;
|
;;
|
||||||
-hiux*)
|
-hiux*)
|
||||||
os=-hiuxwe2
|
os=-hiuxwe2
|
||||||
;;
|
;;
|
||||||
@ -214,6 +225,12 @@ case $os in
|
|||||||
-isc*)
|
-isc*)
|
||||||
basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
|
basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
|
||||||
;;
|
;;
|
||||||
|
-lynx*178)
|
||||||
|
os=-lynxos178
|
||||||
|
;;
|
||||||
|
-lynx*5)
|
||||||
|
os=-lynxos5
|
||||||
|
;;
|
||||||
-lynx*)
|
-lynx*)
|
||||||
os=-lynxos
|
os=-lynxos
|
||||||
;;
|
;;
|
||||||
@ -238,17 +255,23 @@ case $basic_machine in
|
|||||||
# Some are omitted here because they have special meanings below.
|
# Some are omitted here because they have special meanings below.
|
||||||
1750a | 580 \
|
1750a | 580 \
|
||||||
| a29k \
|
| a29k \
|
||||||
|
| aarch64 | aarch64_be \
|
||||||
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
|
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
|
||||||
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
|
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
|
||||||
| am33_2.0 \
|
| am33_2.0 \
|
||||||
| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
|
| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
|
||||||
|
| be32 | be64 \
|
||||||
| bfin \
|
| bfin \
|
||||||
| c4x | clipper \
|
| c4x | clipper \
|
||||||
| d10v | d30v | dlx | dsp16xx | dvp \
|
| d10v | d30v | dlx | dsp16xx | dvp \
|
||||||
|
| epiphany \
|
||||||
| fido | fr30 | frv \
|
| fido | fr30 | frv \
|
||||||
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
|
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
|
||||||
|
| hexagon \
|
||||||
| i370 | i860 | i960 | ia64 \
|
| i370 | i860 | i960 | ia64 \
|
||||||
| ip2k | iq2000 \
|
| ip2k | iq2000 \
|
||||||
|
| le32 | le64 \
|
||||||
|
| lm32 \
|
||||||
| m32c | m32r | m32rle | m68000 | m68k | m88k \
|
| m32c | m32r | m32rle | m68000 | m68k | m88k \
|
||||||
| maxq | mb | microblaze | mcore | mep | metag \
|
| maxq | mb | microblaze | mcore | mep | metag \
|
||||||
| mips | mipsbe | mipseb | mipsel | mipsle \
|
| mips | mipsbe | mipseb | mipsel | mipsle \
|
||||||
@ -270,29 +293,42 @@ case $basic_machine in
|
|||||||
| mipsisa64sr71k | mipsisa64sr71kel \
|
| mipsisa64sr71k | mipsisa64sr71kel \
|
||||||
| mipstx39 | mipstx39el \
|
| mipstx39 | mipstx39el \
|
||||||
| mn10200 | mn10300 \
|
| mn10200 | mn10300 \
|
||||||
|
| moxie \
|
||||||
| mt \
|
| mt \
|
||||||
| msp430 \
|
| msp430 \
|
||||||
|
| nds32 | nds32le | nds32be \
|
||||||
| nios | nios2 \
|
| nios | nios2 \
|
||||||
| ns16k | ns32k \
|
| ns16k | ns32k \
|
||||||
|
| open8 \
|
||||||
| or32 \
|
| or32 \
|
||||||
| pdp10 | pdp11 | pj | pjl \
|
| pdp10 | pdp11 | pj | pjl \
|
||||||
| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
|
| powerpc | powerpc64 | powerpc64le | powerpcle \
|
||||||
| pyramid \
|
| pyramid \
|
||||||
|
| rl78 | rx \
|
||||||
| score \
|
| score \
|
||||||
| sh | sh[1234] | sh[24]a | sh[24]a*eb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
|
| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
|
||||||
| sh64 | sh64le \
|
| sh64 | sh64le \
|
||||||
| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
|
| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
|
||||||
| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
|
| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
|
||||||
| spu | strongarm \
|
| spu \
|
||||||
| tahoe | thumb | tic4x | tic80 | tron \
|
| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
|
||||||
| v850 | v850e \
|
| ubicom32 \
|
||||||
|
| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
|
||||||
| we32k \
|
| we32k \
|
||||||
| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
|
| x86 | xc16x | xstormy16 | xtensa \
|
||||||
| z8k | z80)
|
| z8k | z80)
|
||||||
basic_machine=$basic_machine-unknown
|
basic_machine=$basic_machine-unknown
|
||||||
;;
|
;;
|
||||||
m6811 | m68hc11 | m6812 | m68hc12)
|
c54x)
|
||||||
# Motorola 68HC11/12.
|
basic_machine=tic54x-unknown
|
||||||
|
;;
|
||||||
|
c55x)
|
||||||
|
basic_machine=tic55x-unknown
|
||||||
|
;;
|
||||||
|
c6x)
|
||||||
|
basic_machine=tic6x-unknown
|
||||||
|
;;
|
||||||
|
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
|
||||||
basic_machine=$basic_machine-unknown
|
basic_machine=$basic_machine-unknown
|
||||||
os=-none
|
os=-none
|
||||||
;;
|
;;
|
||||||
@ -302,6 +338,21 @@ case $basic_machine in
|
|||||||
basic_machine=mt-unknown
|
basic_machine=mt-unknown
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
strongarm | thumb | xscale)
|
||||||
|
basic_machine=arm-unknown
|
||||||
|
;;
|
||||||
|
xgate)
|
||||||
|
basic_machine=$basic_machine-unknown
|
||||||
|
os=-none
|
||||||
|
;;
|
||||||
|
xscaleeb)
|
||||||
|
basic_machine=armeb-unknown
|
||||||
|
;;
|
||||||
|
|
||||||
|
xscaleel)
|
||||||
|
basic_machine=armel-unknown
|
||||||
|
;;
|
||||||
|
|
||||||
# We use `pc' rather than `unknown'
|
# We use `pc' rather than `unknown'
|
||||||
# because (1) that's what they normally are, and
|
# because (1) that's what they normally are, and
|
||||||
# (2) the word "unknown" tends to confuse beginning users.
|
# (2) the word "unknown" tends to confuse beginning users.
|
||||||
@ -316,24 +367,29 @@ case $basic_machine in
|
|||||||
# Recognize the basic CPU types with company name.
|
# Recognize the basic CPU types with company name.
|
||||||
580-* \
|
580-* \
|
||||||
| a29k-* \
|
| a29k-* \
|
||||||
|
| aarch64-* | aarch64_be-* \
|
||||||
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
|
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
|
||||||
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
|
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
|
||||||
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
|
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
|
||||||
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
|
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
|
||||||
| avr-* | avr32-* \
|
| avr-* | avr32-* \
|
||||||
|
| be32-* | be64-* \
|
||||||
| bfin-* | bs2000-* \
|
| bfin-* | bs2000-* \
|
||||||
| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
|
| c[123]* | c30-* | [cjt]90-* | c4x-* \
|
||||||
| clipper-* | craynv-* | cydra-* \
|
| clipper-* | craynv-* | cydra-* \
|
||||||
| d10v-* | d30v-* | dlx-* \
|
| d10v-* | d30v-* | dlx-* \
|
||||||
| elxsi-* \
|
| elxsi-* \
|
||||||
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
|
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
|
||||||
| h8300-* | h8500-* \
|
| h8300-* | h8500-* \
|
||||||
| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
|
| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
|
||||||
|
| hexagon-* \
|
||||||
| i*86-* | i860-* | i960-* | ia64-* \
|
| i*86-* | i860-* | i960-* | ia64-* \
|
||||||
| ip2k-* | iq2000-* \
|
| ip2k-* | iq2000-* \
|
||||||
|
| le32-* | le64-* \
|
||||||
|
| lm32-* \
|
||||||
| m32c-* | m32r-* | m32rle-* \
|
| m32c-* | m32r-* | m32rle-* \
|
||||||
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
|
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
|
||||||
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
|
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
|
||||||
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
|
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
|
||||||
| mips16-* \
|
| mips16-* \
|
||||||
| mips64-* | mips64el-* \
|
| mips64-* | mips64el-* \
|
||||||
@ -355,24 +411,29 @@ case $basic_machine in
|
|||||||
| mmix-* \
|
| mmix-* \
|
||||||
| mt-* \
|
| mt-* \
|
||||||
| msp430-* \
|
| msp430-* \
|
||||||
|
| nds32-* | nds32le-* | nds32be-* \
|
||||||
| nios-* | nios2-* \
|
| nios-* | nios2-* \
|
||||||
| none-* | np1-* | ns16k-* | ns32k-* \
|
| none-* | np1-* | ns16k-* | ns32k-* \
|
||||||
|
| open8-* \
|
||||||
| orion-* \
|
| orion-* \
|
||||||
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
|
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
|
||||||
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
|
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
|
||||||
| pyramid-* \
|
| pyramid-* \
|
||||||
| romp-* | rs6000-* \
|
| rl78-* | romp-* | rs6000-* | rx-* \
|
||||||
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]a*eb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
|
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
|
||||||
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
|
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
|
||||||
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
|
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
|
||||||
| sparclite-* \
|
| sparclite-* \
|
||||||
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
|
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
|
||||||
| tahoe-* | thumb-* \
|
| tahoe-* \
|
||||||
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \
|
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
|
||||||
|
| tile*-* \
|
||||||
| tron-* \
|
| tron-* \
|
||||||
| v850-* | v850e-* | vax-* \
|
| ubicom32-* \
|
||||||
|
| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
|
||||||
|
| vax-* \
|
||||||
| we32k-* \
|
| we32k-* \
|
||||||
| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
|
| x86-* | x86_64-* | xc16x-* | xps100-* \
|
||||||
| xstormy16-* | xtensa*-* \
|
| xstormy16-* | xtensa*-* \
|
||||||
| ymp-* \
|
| ymp-* \
|
||||||
| z8k-* | z80-*)
|
| z8k-* | z80-*)
|
||||||
@ -397,7 +458,7 @@ case $basic_machine in
|
|||||||
basic_machine=a29k-amd
|
basic_machine=a29k-amd
|
||||||
os=-udi
|
os=-udi
|
||||||
;;
|
;;
|
||||||
abacus)
|
abacus)
|
||||||
basic_machine=abacus-unknown
|
basic_machine=abacus-unknown
|
||||||
;;
|
;;
|
||||||
adobe68k)
|
adobe68k)
|
||||||
@ -443,6 +504,10 @@ case $basic_machine in
|
|||||||
basic_machine=m68k-apollo
|
basic_machine=m68k-apollo
|
||||||
os=-bsd
|
os=-bsd
|
||||||
;;
|
;;
|
||||||
|
aros)
|
||||||
|
basic_machine=i386-pc
|
||||||
|
os=-aros
|
||||||
|
;;
|
||||||
aux)
|
aux)
|
||||||
basic_machine=m68k-apple
|
basic_machine=m68k-apple
|
||||||
os=-aux
|
os=-aux
|
||||||
@ -459,11 +524,24 @@ case $basic_machine in
|
|||||||
basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
|
basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
|
||||||
os=-linux
|
os=-linux
|
||||||
;;
|
;;
|
||||||
|
bluegene*)
|
||||||
|
basic_machine=powerpc-ibm
|
||||||
|
os=-cnk
|
||||||
|
;;
|
||||||
|
c54x-*)
|
||||||
|
basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
|
||||||
|
;;
|
||||||
|
c55x-*)
|
||||||
|
basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
|
||||||
|
;;
|
||||||
|
c6x-*)
|
||||||
|
basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
|
||||||
|
;;
|
||||||
c90)
|
c90)
|
||||||
basic_machine=c90-cray
|
basic_machine=c90-cray
|
||||||
os=-unicos
|
os=-unicos
|
||||||
;;
|
;;
|
||||||
cegcc)
|
cegcc)
|
||||||
basic_machine=arm-unknown
|
basic_machine=arm-unknown
|
||||||
os=-cegcc
|
os=-cegcc
|
||||||
;;
|
;;
|
||||||
@ -495,7 +573,7 @@ case $basic_machine in
|
|||||||
basic_machine=craynv-cray
|
basic_machine=craynv-cray
|
||||||
os=-unicosmp
|
os=-unicosmp
|
||||||
;;
|
;;
|
||||||
cr16)
|
cr16 | cr16-*)
|
||||||
basic_machine=cr16-unknown
|
basic_machine=cr16-unknown
|
||||||
os=-elf
|
os=-elf
|
||||||
;;
|
;;
|
||||||
@ -653,7 +731,6 @@ case $basic_machine in
|
|||||||
i370-ibm* | ibm*)
|
i370-ibm* | ibm*)
|
||||||
basic_machine=i370-ibm
|
basic_machine=i370-ibm
|
||||||
;;
|
;;
|
||||||
# I'm not sure what "Sysv32" means. Should this be sysv3.2?
|
|
||||||
i*86v32)
|
i*86v32)
|
||||||
basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
|
basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
|
||||||
os=-sysv32
|
os=-sysv32
|
||||||
@ -711,6 +788,13 @@ case $basic_machine in
|
|||||||
basic_machine=ns32k-utek
|
basic_machine=ns32k-utek
|
||||||
os=-sysv
|
os=-sysv
|
||||||
;;
|
;;
|
||||||
|
microblaze)
|
||||||
|
basic_machine=microblaze-xilinx
|
||||||
|
;;
|
||||||
|
mingw64)
|
||||||
|
basic_machine=x86_64-pc
|
||||||
|
os=-mingw64
|
||||||
|
;;
|
||||||
mingw32)
|
mingw32)
|
||||||
basic_machine=i386-pc
|
basic_machine=i386-pc
|
||||||
os=-mingw32
|
os=-mingw32
|
||||||
@ -765,10 +849,18 @@ case $basic_machine in
|
|||||||
ms1-*)
|
ms1-*)
|
||||||
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
|
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
|
||||||
;;
|
;;
|
||||||
|
msys)
|
||||||
|
basic_machine=i386-pc
|
||||||
|
os=-msys
|
||||||
|
;;
|
||||||
mvs)
|
mvs)
|
||||||
basic_machine=i370-ibm
|
basic_machine=i370-ibm
|
||||||
os=-mvs
|
os=-mvs
|
||||||
;;
|
;;
|
||||||
|
nacl)
|
||||||
|
basic_machine=le32-unknown
|
||||||
|
os=-nacl
|
||||||
|
;;
|
||||||
ncr3000)
|
ncr3000)
|
||||||
basic_machine=i486-ncr
|
basic_machine=i486-ncr
|
||||||
os=-sysv4
|
os=-sysv4
|
||||||
@ -833,6 +925,12 @@ case $basic_machine in
|
|||||||
np1)
|
np1)
|
||||||
basic_machine=np1-gould
|
basic_machine=np1-gould
|
||||||
;;
|
;;
|
||||||
|
neo-tandem)
|
||||||
|
basic_machine=neo-tandem
|
||||||
|
;;
|
||||||
|
nse-tandem)
|
||||||
|
basic_machine=nse-tandem
|
||||||
|
;;
|
||||||
nsr-tandem)
|
nsr-tandem)
|
||||||
basic_machine=nsr-tandem
|
basic_machine=nsr-tandem
|
||||||
;;
|
;;
|
||||||
@ -915,9 +1013,10 @@ case $basic_machine in
|
|||||||
;;
|
;;
|
||||||
power) basic_machine=power-ibm
|
power) basic_machine=power-ibm
|
||||||
;;
|
;;
|
||||||
ppc) basic_machine=powerpc-unknown
|
ppc | ppcbe) basic_machine=powerpc-unknown
|
||||||
;;
|
;;
|
||||||
ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
|
ppc-* | ppcbe-*)
|
||||||
|
basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
|
||||||
;;
|
;;
|
||||||
ppcle | powerpclittle | ppc-le | powerpc-little)
|
ppcle | powerpclittle | ppc-le | powerpc-little)
|
||||||
basic_machine=powerpcle-unknown
|
basic_machine=powerpcle-unknown
|
||||||
@ -1011,6 +1110,9 @@ case $basic_machine in
|
|||||||
basic_machine=i860-stratus
|
basic_machine=i860-stratus
|
||||||
os=-sysv4
|
os=-sysv4
|
||||||
;;
|
;;
|
||||||
|
strongarm-* | thumb-*)
|
||||||
|
basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
|
||||||
|
;;
|
||||||
sun2)
|
sun2)
|
||||||
basic_machine=m68000-sun
|
basic_machine=m68000-sun
|
||||||
;;
|
;;
|
||||||
@ -1067,20 +1169,8 @@ case $basic_machine in
|
|||||||
basic_machine=t90-cray
|
basic_machine=t90-cray
|
||||||
os=-unicos
|
os=-unicos
|
||||||
;;
|
;;
|
||||||
tic54x | c54x*)
|
|
||||||
basic_machine=tic54x-unknown
|
|
||||||
os=-coff
|
|
||||||
;;
|
|
||||||
tic55x | c55x*)
|
|
||||||
basic_machine=tic55x-unknown
|
|
||||||
os=-coff
|
|
||||||
;;
|
|
||||||
tic6x | c6x*)
|
|
||||||
basic_machine=tic6x-unknown
|
|
||||||
os=-coff
|
|
||||||
;;
|
|
||||||
tile*)
|
tile*)
|
||||||
basic_machine=tile-unknown
|
basic_machine=$basic_machine-unknown
|
||||||
os=-linux-gnu
|
os=-linux-gnu
|
||||||
;;
|
;;
|
||||||
tx39)
|
tx39)
|
||||||
@ -1150,6 +1240,9 @@ case $basic_machine in
|
|||||||
xps | xps100)
|
xps | xps100)
|
||||||
basic_machine=xps100-honeywell
|
basic_machine=xps100-honeywell
|
||||||
;;
|
;;
|
||||||
|
xscale-* | xscalee[bl]-*)
|
||||||
|
basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
|
||||||
|
;;
|
||||||
ymp)
|
ymp)
|
||||||
basic_machine=ymp-cray
|
basic_machine=ymp-cray
|
||||||
os=-unicos
|
os=-unicos
|
||||||
@ -1200,7 +1293,7 @@ case $basic_machine in
|
|||||||
we32k)
|
we32k)
|
||||||
basic_machine=we32k-att
|
basic_machine=we32k-att
|
||||||
;;
|
;;
|
||||||
sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele)
|
sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
|
||||||
basic_machine=sh-unknown
|
basic_machine=sh-unknown
|
||||||
;;
|
;;
|
||||||
sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
|
sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
|
||||||
@ -1247,9 +1340,12 @@ esac
|
|||||||
if [ x"$os" != x"" ]
|
if [ x"$os" != x"" ]
|
||||||
then
|
then
|
||||||
case $os in
|
case $os in
|
||||||
# First match some system type aliases
|
# First match some system type aliases
|
||||||
# that might get confused with valid system types.
|
# that might get confused with valid system types.
|
||||||
# -solaris* is a basic system type, with this one exception.
|
# -solaris* is a basic system type, with this one exception.
|
||||||
|
-auroraux)
|
||||||
|
os=-auroraux
|
||||||
|
;;
|
||||||
-solaris1 | -solaris1.*)
|
-solaris1 | -solaris1.*)
|
||||||
os=`echo $os | sed -e 's|solaris1|sunos4|'`
|
os=`echo $os | sed -e 's|solaris1|sunos4|'`
|
||||||
;;
|
;;
|
||||||
@ -1270,29 +1366,31 @@ case $os in
|
|||||||
# Each alternative MUST END IN A *, to match a version number.
|
# Each alternative MUST END IN A *, to match a version number.
|
||||||
# -sysv* is not here because it comes later, after sysvr4.
|
# -sysv* is not here because it comes later, after sysvr4.
|
||||||
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
|
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
|
||||||
| -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
|
| -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
|
||||||
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
|
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
|
||||||
|
| -sym* | -kopensolaris* \
|
||||||
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
|
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
|
||||||
| -aos* \
|
| -aos* | -aros* \
|
||||||
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
|
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
|
||||||
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
|
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
|
||||||
| -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
|
| -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
|
||||||
| -openbsd* | -solidbsd* \
|
| -bitrig* | -openbsd* | -solidbsd* \
|
||||||
| -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
|
| -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
|
||||||
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
|
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
|
||||||
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
|
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
|
||||||
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
|
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
|
||||||
| -chorusos* | -chorusrdb* | -cegcc* \
|
| -chorusos* | -chorusrdb* | -cegcc* \
|
||||||
| -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
|
| -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
|
||||||
| -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
|
| -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
|
||||||
|
| -linux-newlib* | -linux-musl* | -linux-uclibc* \
|
||||||
| -uxpv* | -beos* | -mpeix* | -udk* \
|
| -uxpv* | -beos* | -mpeix* | -udk* \
|
||||||
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
|
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
|
||||||
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
|
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
|
||||||
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
|
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -irx* \
|
||||||
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
|
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
|
||||||
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
|
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
|
||||||
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
|
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
|
||||||
| -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -irx*)
|
| -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
|
||||||
# Remember, each alternative MUST END IN *, to match a version number.
|
# Remember, each alternative MUST END IN *, to match a version number.
|
||||||
;;
|
;;
|
||||||
-qnx*)
|
-qnx*)
|
||||||
@ -1331,7 +1429,7 @@ case $os in
|
|||||||
-opened*)
|
-opened*)
|
||||||
os=-openedition
|
os=-openedition
|
||||||
;;
|
;;
|
||||||
-os400*)
|
-os400*)
|
||||||
os=-os400
|
os=-os400
|
||||||
;;
|
;;
|
||||||
-wince*)
|
-wince*)
|
||||||
@ -1380,7 +1478,7 @@ case $os in
|
|||||||
-sinix*)
|
-sinix*)
|
||||||
os=-sysv4
|
os=-sysv4
|
||||||
;;
|
;;
|
||||||
-tpf*)
|
-tpf*)
|
||||||
os=-tpf
|
os=-tpf
|
||||||
;;
|
;;
|
||||||
-triton*)
|
-triton*)
|
||||||
@ -1425,6 +1523,8 @@ case $os in
|
|||||||
-dicos*)
|
-dicos*)
|
||||||
os=-dicos
|
os=-dicos
|
||||||
;;
|
;;
|
||||||
|
-nacl*)
|
||||||
|
;;
|
||||||
-none)
|
-none)
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
@ -1447,10 +1547,10 @@ else
|
|||||||
# system, and we'll never get to this point.
|
# system, and we'll never get to this point.
|
||||||
|
|
||||||
case $basic_machine in
|
case $basic_machine in
|
||||||
score-*)
|
score-*)
|
||||||
os=-elf
|
os=-elf
|
||||||
;;
|
;;
|
||||||
spu-*)
|
spu-*)
|
||||||
os=-elf
|
os=-elf
|
||||||
;;
|
;;
|
||||||
*-acorn)
|
*-acorn)
|
||||||
@ -1462,8 +1562,20 @@ case $basic_machine in
|
|||||||
arm*-semi)
|
arm*-semi)
|
||||||
os=-aout
|
os=-aout
|
||||||
;;
|
;;
|
||||||
c4x-* | tic4x-*)
|
c4x-* | tic4x-*)
|
||||||
os=-coff
|
os=-coff
|
||||||
|
;;
|
||||||
|
hexagon-*)
|
||||||
|
os=-elf
|
||||||
|
;;
|
||||||
|
tic54x-*)
|
||||||
|
os=-coff
|
||||||
|
;;
|
||||||
|
tic55x-*)
|
||||||
|
os=-coff
|
||||||
|
;;
|
||||||
|
tic6x-*)
|
||||||
|
os=-coff
|
||||||
;;
|
;;
|
||||||
# This must come before the *-dec entry.
|
# This must come before the *-dec entry.
|
||||||
pdp10-*)
|
pdp10-*)
|
||||||
@ -1483,14 +1595,11 @@ case $basic_machine in
|
|||||||
;;
|
;;
|
||||||
m68000-sun)
|
m68000-sun)
|
||||||
os=-sunos3
|
os=-sunos3
|
||||||
# This also exists in the configure program, but was not the
|
|
||||||
# default.
|
|
||||||
# os=-sunos4
|
|
||||||
;;
|
;;
|
||||||
m68*-cisco)
|
m68*-cisco)
|
||||||
os=-aout
|
os=-aout
|
||||||
;;
|
;;
|
||||||
mep-*)
|
mep-*)
|
||||||
os=-elf
|
os=-elf
|
||||||
;;
|
;;
|
||||||
mips*-cisco)
|
mips*-cisco)
|
||||||
@ -1517,7 +1626,7 @@ case $basic_machine in
|
|||||||
*-ibm)
|
*-ibm)
|
||||||
os=-aix
|
os=-aix
|
||||||
;;
|
;;
|
||||||
*-knuth)
|
*-knuth)
|
||||||
os=-mmixware
|
os=-mmixware
|
||||||
;;
|
;;
|
||||||
*-wec)
|
*-wec)
|
||||||
@ -1622,7 +1731,7 @@ case $basic_machine in
|
|||||||
-sunos*)
|
-sunos*)
|
||||||
vendor=sun
|
vendor=sun
|
||||||
;;
|
;;
|
||||||
-aix*)
|
-cnk*|-aix*)
|
||||||
vendor=ibm
|
vendor=ibm
|
||||||
;;
|
;;
|
||||||
-beos*)
|
-beos*)
|
||||||
|
27885
tools/pcre/configure
vendored
27885
tools/pcre/configure
vendored
File diff suppressed because it is too large
Load Diff
@ -1,27 +1,38 @@
|
|||||||
dnl Process this file with autoconf to produce a configure script.
|
dnl Process this file with autoconf to produce a configure script.
|
||||||
|
|
||||||
dnl NOTE FOR MAINTAINERS: Do not use major or minor version numbers with
|
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
|
||||||
dnl leading zeros, because they may be treated as octal constants. The
|
dnl the leading zeros may cause them to be treated as invalid octal constants
|
||||||
dnl PCRE_PRERELEASE feature is for identifying release candidates. It might
|
dnl if a PCRE user writes code that uses PCRE_MINOR as a number. There is now
|
||||||
dnl be defined as -RC2, for example. For real releases, it should be defined
|
dnl a check further down that throws an error if 08 or 09 are used.
|
||||||
dnl empty.
|
|
||||||
|
|
||||||
m4_define(pcre_major, [7])
|
dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
||||||
m4_define(pcre_minor, [9])
|
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
|
|
||||||
|
m4_define(pcre_major, [8])
|
||||||
|
m4_define(pcre_minor, [32])
|
||||||
m4_define(pcre_prerelease, [])
|
m4_define(pcre_prerelease, [])
|
||||||
m4_define(pcre_date, [2009-04-11])
|
m4_define(pcre_date, [2012-11-30])
|
||||||
|
|
||||||
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
||||||
# Libtool shared library interface versions (current:revision:age)
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
m4_define(libpcre_version, [0:1:0])
|
m4_define(libpcre_version, [3:0:2])
|
||||||
m4_define(libpcreposix_version, [0:0:0])
|
m4_define(libpcre16_version, [2:0:2])
|
||||||
|
m4_define(libpcre32_version, [0:0:0])
|
||||||
|
m4_define(libpcreposix_version, [0:1:0])
|
||||||
m4_define(libpcrecpp_version, [0:0:0])
|
m4_define(libpcrecpp_version, [0:0:0])
|
||||||
|
|
||||||
AC_PREREQ(2.57)
|
AC_PREREQ(2.57)
|
||||||
AC_INIT(PCRE, pcre_major.pcre_minor[]pcre_prerelease, , pcre)
|
AC_INIT(PCRE, pcre_major.pcre_minor[]pcre_prerelease, , pcre)
|
||||||
AC_CONFIG_SRCDIR([pcre.h.in])
|
AC_CONFIG_SRCDIR([pcre.h.in])
|
||||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
||||||
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
AC_CONFIG_HEADERS(config.h)
|
AC_CONFIG_HEADERS(config.h)
|
||||||
|
|
||||||
|
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||||
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
|
|
||||||
# The default CFLAGS and CXXFLAGS in Autoconf are "-g -O2" for gcc and just
|
# The default CFLAGS and CXXFLAGS in Autoconf are "-g -O2" for gcc and just
|
||||||
# "-g" for any other compiler. There doesn't seem to be a standard way of
|
# "-g" for any other compiler. There doesn't seem to be a standard way of
|
||||||
# getting rid of the -g (which I don't think is needed for a production
|
# getting rid of the -g (which I don't think is needed for a production
|
||||||
@ -37,6 +48,7 @@ remember_set_CXXFLAGS="$CXXFLAGS"
|
|||||||
|
|
||||||
AC_PROG_CC
|
AC_PROG_CC
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
|
AM_PROG_CC_C_O
|
||||||
|
|
||||||
if test "x$remember_set_CFLAGS" = "x"
|
if test "x$remember_set_CFLAGS" = "x"
|
||||||
then
|
then
|
||||||
@ -63,19 +75,37 @@ fi
|
|||||||
# AC_PROG_CXX will return "g++" even if no c++ compiler is installed.
|
# AC_PROG_CXX will return "g++" even if no c++ compiler is installed.
|
||||||
# Check for that case, and just disable c++ code if g++ doesn't run.
|
# Check for that case, and just disable c++ code if g++ doesn't run.
|
||||||
AC_LANG_PUSH(C++)
|
AC_LANG_PUSH(C++)
|
||||||
AC_COMPILE_IFELSE(AC_LANG_PROGRAM([],[]),, CXX=""; CXXCP=""; CXXFLAGS="")
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],, CXX=""; CXXCP=""; CXXFLAGS="")
|
||||||
AC_LANG_POP
|
AC_LANG_POP
|
||||||
|
|
||||||
|
# Check for a 64-bit integer type
|
||||||
|
AC_TYPE_INT64_T
|
||||||
|
|
||||||
AC_PROG_INSTALL
|
AC_PROG_INSTALL
|
||||||
AC_LIBTOOL_WIN32_DLL
|
AC_LIBTOOL_WIN32_DLL
|
||||||
AC_PROG_LIBTOOL
|
LT_INIT
|
||||||
AC_PROG_LN_S
|
AC_PROG_LN_S
|
||||||
|
|
||||||
|
# Check for GCC visibility feature
|
||||||
|
|
||||||
|
PCRE_VISIBILITY
|
||||||
|
|
||||||
|
# Versioning
|
||||||
|
|
||||||
PCRE_MAJOR="pcre_major"
|
PCRE_MAJOR="pcre_major"
|
||||||
PCRE_MINOR="pcre_minor"
|
PCRE_MINOR="pcre_minor"
|
||||||
PCRE_PRERELEASE="pcre_prerelease"
|
PCRE_PRERELEASE="pcre_prerelease"
|
||||||
PCRE_DATE="pcre_date"
|
PCRE_DATE="pcre_date"
|
||||||
|
|
||||||
|
if test "$PCRE_MINOR" = "08" -o "$PCRE_MINOR" = "09"
|
||||||
|
then
|
||||||
|
echo "***"
|
||||||
|
echo "*** Minor version number $PCRE_MINOR must not be used. ***"
|
||||||
|
echo "*** Use only 01 to 07 or 10 onwards, to avoid octal issues. ***"
|
||||||
|
echo "***"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
AC_SUBST(PCRE_MAJOR)
|
AC_SUBST(PCRE_MAJOR)
|
||||||
AC_SUBST(PCRE_MINOR)
|
AC_SUBST(PCRE_MINOR)
|
||||||
AC_SUBST(PCRE_PRERELEASE)
|
AC_SUBST(PCRE_PRERELEASE)
|
||||||
@ -87,11 +117,46 @@ then
|
|||||||
htmldir='${docdir}/html'
|
htmldir='${docdir}/html'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle --disable-cpp
|
# Handle --disable-pcre8 (enabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre8,
|
||||||
|
AS_HELP_STRING([--disable-pcre8],
|
||||||
|
[disable 8 bit character support]),
|
||||||
|
, enable_pcre8=unset)
|
||||||
|
AC_SUBST(enable_pcre8)
|
||||||
|
|
||||||
|
# Handle --enable-pcre16 (disabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre16,
|
||||||
|
AS_HELP_STRING([--enable-pcre16],
|
||||||
|
[enable 16 bit character support]),
|
||||||
|
, enable_pcre16=unset)
|
||||||
|
AC_SUBST(enable_pcre16)
|
||||||
|
|
||||||
|
# Handle --enable-pcre32 (disabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre32,
|
||||||
|
AS_HELP_STRING([--enable-pcre32],
|
||||||
|
[enable 32 bit character support]),
|
||||||
|
, enable_pcre32=unset)
|
||||||
|
AC_SUBST(enable_pcre32)
|
||||||
|
|
||||||
|
# Handle --disable-cpp. The substitution of enable_cpp is needed for use in
|
||||||
|
# pcre-config.
|
||||||
AC_ARG_ENABLE(cpp,
|
AC_ARG_ENABLE(cpp,
|
||||||
AS_HELP_STRING([--disable-cpp],
|
AS_HELP_STRING([--disable-cpp],
|
||||||
[disable C++ support]),
|
[disable C++ support]),
|
||||||
, enable_cpp=yes)
|
, enable_cpp=unset)
|
||||||
|
AC_SUBST(enable_cpp)
|
||||||
|
|
||||||
|
# Handle --enable-jit (disabled by default)
|
||||||
|
AC_ARG_ENABLE(jit,
|
||||||
|
AS_HELP_STRING([--enable-jit],
|
||||||
|
[enable Just-In-Time compiling support]),
|
||||||
|
, enable_jit=no)
|
||||||
|
|
||||||
|
# Handle --disable-pcregrep-jit (enabled by default)
|
||||||
|
AC_ARG_ENABLE(pcregrep-jit,
|
||||||
|
AS_HELP_STRING([--disable-pcregrep-jit],
|
||||||
|
[disable JIT support in pcregrep]),
|
||||||
|
, enable_pcregrep_jit=yes)
|
||||||
|
|
||||||
# Handle --enable-rebuild-chartables
|
# Handle --enable-rebuild-chartables
|
||||||
AC_ARG_ENABLE(rebuild-chartables,
|
AC_ARG_ENABLE(rebuild-chartables,
|
||||||
@ -102,22 +167,22 @@ AC_ARG_ENABLE(rebuild-chartables,
|
|||||||
# Handle --enable-utf8 (disabled by default)
|
# Handle --enable-utf8 (disabled by default)
|
||||||
AC_ARG_ENABLE(utf8,
|
AC_ARG_ENABLE(utf8,
|
||||||
AS_HELP_STRING([--enable-utf8],
|
AS_HELP_STRING([--enable-utf8],
|
||||||
[enable UTF-8 support (incompatible with --enable-ebcdic)]),
|
[another name for --enable-utf. Kept only for compatibility reasons]),
|
||||||
, enable_utf8=unset)
|
, enable_utf8=unset)
|
||||||
|
|
||||||
|
# Handle --enable-utf (disabled by default)
|
||||||
|
AC_ARG_ENABLE(utf,
|
||||||
|
AS_HELP_STRING([--enable-utf],
|
||||||
|
[enable UTF-8/16/32 support (incompatible with --enable-ebcdic)]),
|
||||||
|
, enable_utf=unset)
|
||||||
|
|
||||||
# Handle --enable-unicode-properties
|
# Handle --enable-unicode-properties
|
||||||
AC_ARG_ENABLE(unicode-properties,
|
AC_ARG_ENABLE(unicode-properties,
|
||||||
AS_HELP_STRING([--enable-unicode-properties],
|
AS_HELP_STRING([--enable-unicode-properties],
|
||||||
[enable Unicode properties support (implies --enable-utf8)]),
|
[enable Unicode properties support (implies --enable-utf)]),
|
||||||
, enable_unicode_properties=no)
|
, enable_unicode_properties=no)
|
||||||
|
|
||||||
# Handle --enable-newline=NL
|
# Handle newline options
|
||||||
dnl AC_ARG_ENABLE(newline,
|
|
||||||
dnl AS_HELP_STRING([--enable-newline=NL],
|
|
||||||
dnl [use NL as newline (lf, cr, crlf, anycrlf, any; default=lf)]),
|
|
||||||
dnl , enable_newline=lf)
|
|
||||||
|
|
||||||
# Separate newline options
|
|
||||||
ac_pcre_newline=lf
|
ac_pcre_newline=lf
|
||||||
AC_ARG_ENABLE(newline-is-cr,
|
AC_ARG_ENABLE(newline-is-cr,
|
||||||
AS_HELP_STRING([--enable-newline-is-cr],
|
AS_HELP_STRING([--enable-newline-is-cr],
|
||||||
@ -150,9 +215,15 @@ AC_ARG_ENABLE(bsr-anycrlf,
|
|||||||
# Handle --enable-ebcdic
|
# Handle --enable-ebcdic
|
||||||
AC_ARG_ENABLE(ebcdic,
|
AC_ARG_ENABLE(ebcdic,
|
||||||
AS_HELP_STRING([--enable-ebcdic],
|
AS_HELP_STRING([--enable-ebcdic],
|
||||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||||
, enable_ebcdic=no)
|
, enable_ebcdic=no)
|
||||||
|
|
||||||
|
# Handle --enable-ebcdic-nl25
|
||||||
|
AC_ARG_ENABLE(ebcdic-nl25,
|
||||||
|
AS_HELP_STRING([--enable-ebcdic-nl25],
|
||||||
|
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
|
||||||
|
, enable_ebcdic_nl25=no)
|
||||||
|
|
||||||
# Handle --disable-stack-for-recursion
|
# Handle --disable-stack-for-recursion
|
||||||
AC_ARG_ENABLE(stack-for-recursion,
|
AC_ARG_ENABLE(stack-for-recursion,
|
||||||
AS_HELP_STRING([--disable-stack-for-recursion],
|
AS_HELP_STRING([--disable-stack-for-recursion],
|
||||||
@ -171,6 +242,18 @@ AC_ARG_ENABLE(pcregrep-libbz2,
|
|||||||
[link pcregrep with libbz2 to handle .bz2 files]),
|
[link pcregrep with libbz2 to handle .bz2 files]),
|
||||||
, enable_pcregrep_libbz2=no)
|
, enable_pcregrep_libbz2=no)
|
||||||
|
|
||||||
|
# Handle --with-pcregrep-bufsize=N
|
||||||
|
AC_ARG_WITH(pcregrep-bufsize,
|
||||||
|
AS_HELP_STRING([--with-pcregrep-bufsize=N],
|
||||||
|
[pcregrep buffer size (default=20480)]),
|
||||||
|
, with_pcregrep_bufsize=20480)
|
||||||
|
|
||||||
|
# Handle --enable-pcretest-libedit
|
||||||
|
AC_ARG_ENABLE(pcretest-libedit,
|
||||||
|
AS_HELP_STRING([--enable-pcretest-libedit],
|
||||||
|
[link pcretest with libedit]),
|
||||||
|
, enable_pcretest_libedit=no)
|
||||||
|
|
||||||
# Handle --enable-pcretest-libreadline
|
# Handle --enable-pcretest-libreadline
|
||||||
AC_ARG_ENABLE(pcretest-libreadline,
|
AC_ARG_ENABLE(pcretest-libreadline,
|
||||||
AS_HELP_STRING([--enable-pcretest-libreadline],
|
AS_HELP_STRING([--enable-pcretest-libreadline],
|
||||||
@ -208,38 +291,87 @@ AC_ARG_WITH(match-limit-recursion,
|
|||||||
[default limit on internal recursion (default=MATCH_LIMIT)]),
|
[default limit on internal recursion (default=MATCH_LIMIT)]),
|
||||||
, with_match_limit_recursion=MATCH_LIMIT)
|
, with_match_limit_recursion=MATCH_LIMIT)
|
||||||
|
|
||||||
# Make sure that if enable_unicode_properties was set, that UTF-8 support
|
# Handle --enable-valgrind
|
||||||
# is enabled.
|
AC_ARG_ENABLE(valgrind,
|
||||||
#
|
AS_HELP_STRING([--enable-valgrind],
|
||||||
|
[valgrind support]),
|
||||||
|
, enable_valgrind=no)
|
||||||
|
|
||||||
|
# Enable code coverage reports using gcov
|
||||||
|
AC_ARG_ENABLE(coverage,
|
||||||
|
AS_HELP_STRING([--enable-coverage],
|
||||||
|
[enable code coverage reports using gcov]),
|
||||||
|
, enable_coverage=no)
|
||||||
|
|
||||||
|
# Copy enable_utf8 value to enable_utf for compatibility reasons
|
||||||
|
if test "x$enable_utf8" != "xunset"
|
||||||
|
then
|
||||||
|
if test "x$enable_utf" != "xunset"
|
||||||
|
then
|
||||||
|
AC_MSG_ERROR([--enable/disable-utf8 is kept only for compatibility reasons and its value is copied to --enable/disable-utf. Newer code must use --enable/disable-utf alone.])
|
||||||
|
fi
|
||||||
|
enable_utf=$enable_utf8
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set the default value for pcre8
|
||||||
|
if test "x$enable_pcre8" = "xunset"
|
||||||
|
then
|
||||||
|
enable_pcre8=yes
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set the default value for pcre16
|
||||||
|
if test "x$enable_pcre16" = "xunset"
|
||||||
|
then
|
||||||
|
enable_pcre16=no
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set the default value for pcre32
|
||||||
|
if test "x$enable_pcre32" = "xunset"
|
||||||
|
then
|
||||||
|
enable_pcre32=no
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure enable_pcre8 or enable_pcre16 was set
|
||||||
|
if test "x$enable_pcre8$enable_pcre16$enable_pcre32" = "xnonono"
|
||||||
|
then
|
||||||
|
AC_MSG_ERROR([At least one of 8, 16 or 32 bit pcre library must be enabled])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure that if enable_unicode_properties was set, that UTF support is enabled.
|
||||||
if test "x$enable_unicode_properties" = "xyes"
|
if test "x$enable_unicode_properties" = "xyes"
|
||||||
then
|
then
|
||||||
if test "x$enable_utf8" = "xno"
|
if test "x$enable_utf" = "xno"
|
||||||
then
|
then
|
||||||
AC_MSG_ERROR([support for Unicode properties requires UTF-8 support])
|
AC_MSG_ERROR([support for Unicode properties requires UTF-8/16/32 support])
|
||||||
fi
|
fi
|
||||||
enable_utf8=yes
|
enable_utf=yes
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "x$enable_utf8" = "xunset"
|
# enable_utf is disabled by default.
|
||||||
|
if test "x$enable_utf" = "xunset"
|
||||||
then
|
then
|
||||||
enable_utf8=no
|
enable_utf=no
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
# enable_cpp copies the value of enable_pcre8 by default
|
||||||
# Also check that UTF-8 support is not requested, because PCRE cannot handle
|
if test "x$enable_cpp" = "xunset"
|
||||||
# EBCDIC and UTF-8 in the same build. To do so it would need to use different
|
|
||||||
# character constants depending on the mode.
|
|
||||||
#
|
|
||||||
if test "x$enable_ebcdic" = "xyes"
|
|
||||||
then
|
then
|
||||||
enable_rebuild_chartables=yes
|
enable_cpp=$enable_pcre8
|
||||||
if test "x$enable_utf8" = "xyes"
|
fi
|
||||||
|
|
||||||
|
# Make sure that if enable_cpp was set, that enable_pcre8 support is enabled
|
||||||
|
if test "x$enable_cpp" = "xyes"
|
||||||
|
then
|
||||||
|
if test "x$enable_pcre8" = "xno"
|
||||||
then
|
then
|
||||||
AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time])
|
AC_MSG_ERROR([C++ library requires pcre library with 8 bit characters])
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Convert the newline identifier into the appropriate integer value.
|
# Convert the newline identifier into the appropriate integer value. The first
|
||||||
|
# three are ASCII values 0x0a, 0x0d, and 0x0d0a, but if EBCDIC is enabled, they
|
||||||
|
# are changed below.
|
||||||
|
|
||||||
case "$enable_newline" in
|
case "$enable_newline" in
|
||||||
lf) ac_pcre_newline_value=10 ;;
|
lf) ac_pcre_newline_value=10 ;;
|
||||||
cr) ac_pcre_newline_value=13 ;;
|
cr) ac_pcre_newline_value=13 ;;
|
||||||
@ -251,6 +383,37 @@ case "$enable_newline" in
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
# --enable-ebcdic-nl25 implies --enable-ebcdic
|
||||||
|
if test "x$enable_ebcdic_nl25" = "xyes"; then
|
||||||
|
enable_ebcdic=yes
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled,
|
||||||
|
# and the newline value is adjusted appropriately (CR is still 13, but LF is
|
||||||
|
# 21 or 37). Also check that UTF support is not requested, because PCRE cannot
|
||||||
|
# handle EBCDIC and UTF in the same build. To do so it would need to use
|
||||||
|
# different character constants depending on the mode.
|
||||||
|
#
|
||||||
|
if test "x$enable_ebcdic" = "xyes"; then
|
||||||
|
enable_rebuild_chartables=yes
|
||||||
|
|
||||||
|
if test "x$enable_utf" = "xyes"; then
|
||||||
|
AC_MSG_ERROR([support for EBCDIC and UTF-8/16/32 cannot be enabled at the same time])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "x$enable_ebcdic_nl25" = "xno"; then
|
||||||
|
case "$ac_pcre_newline_value" in
|
||||||
|
10) ac_pcre_newline_value=21 ;;
|
||||||
|
3338) ac_pcre_newline_value=3349 ;;
|
||||||
|
esac
|
||||||
|
else
|
||||||
|
case "$ac_pcre_newline_value" in
|
||||||
|
10) ac_pcre_newline_value=37 ;;
|
||||||
|
3338) ac_pcre_newline_value=3365 ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Check argument to --with-link-size
|
# Check argument to --with-link-size
|
||||||
case "$with_link_size" in
|
case "$with_link_size" in
|
||||||
2|3|4) ;;
|
2|3|4) ;;
|
||||||
@ -260,16 +423,17 @@ case "$with_link_size" in
|
|||||||
esac
|
esac
|
||||||
|
|
||||||
AH_TOP([
|
AH_TOP([
|
||||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||||
Some other environments also support the use of "configure". PCRE is written in
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
systems.
|
||||||
it to run on SunOS4 and other "close to standard" systems.
|
|
||||||
|
|
||||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
In environments that support the facilities, config.h.in is converted by
|
||||||
should copy the distributed config.h.generic to config.h, and then set up the
|
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
||||||
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
|
are going to build PCRE "by hand" without using "configure" or CMake, you
|
||||||
all of your compile commands, so that config.h is included at the start of
|
should copy the distributed config.h.generic to config.h, and then edit the
|
||||||
every source.
|
macro definitions to be the way you need them. You must then add
|
||||||
|
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
||||||
|
at the start of every source.
|
||||||
|
|
||||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||||
@ -285,6 +449,11 @@ AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h windows.h)
|
|||||||
# The files below are C++ header files.
|
# The files below are C++ header files.
|
||||||
pcre_have_type_traits="0"
|
pcre_have_type_traits="0"
|
||||||
pcre_have_bits_type_traits="0"
|
pcre_have_bits_type_traits="0"
|
||||||
|
|
||||||
|
if test "x$enable_cpp" = "xyes" -a -z "$CXX"; then
|
||||||
|
AC_MSG_ERROR([You need a C++ compiler for C++ support.])
|
||||||
|
fi
|
||||||
|
|
||||||
if test "x$enable_cpp" = "xyes" -a -n "$CXX"
|
if test "x$enable_cpp" = "xyes" -a -n "$CXX"
|
||||||
then
|
then
|
||||||
AC_LANG_PUSH(C++)
|
AC_LANG_PUSH(C++)
|
||||||
@ -301,11 +470,11 @@ for flag in "-alias,__ZN7pcrecpp2RE6no_argE,__ZN7pcrecpp6no_argE" \
|
|||||||
LDFLAGS="$OLD_LDFLAGS -Wl,$flag"
|
LDFLAGS="$OLD_LDFLAGS -Wl,$flag"
|
||||||
# We try to run the linker with this new ld flag. If the link fails,
|
# We try to run the linker with this new ld flag. If the link fails,
|
||||||
# we give up and remove the new flag from LDFLAGS.
|
# we give up and remove the new flag from LDFLAGS.
|
||||||
AC_LINK_IFELSE(AC_LANG_PROGRAM([namespace pcrecpp {
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([namespace pcrecpp {
|
||||||
class RE { static int no_arg; };
|
class RE { static int no_arg; };
|
||||||
int RE::no_arg;
|
int RE::no_arg;
|
||||||
}],
|
}],
|
||||||
[]),
|
[])],
|
||||||
[AC_MSG_RESULT([yes]);
|
[AC_MSG_RESULT([yes]);
|
||||||
EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS -Wl,$flag";
|
EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS -Wl,$flag";
|
||||||
break;],
|
break;],
|
||||||
@ -323,6 +492,49 @@ AC_CHECK_HEADERS(bits/type_traits.h, [pcre_have_bits_type_traits="1"],
|
|||||||
AC_CHECK_HEADERS(type_traits.h, [pcre_have_type_traits="1"],
|
AC_CHECK_HEADERS(type_traits.h, [pcre_have_type_traits="1"],
|
||||||
[pcre_have_type_traits="0"])
|
[pcre_have_type_traits="0"])
|
||||||
|
|
||||||
|
# (This isn't c++-specific, but is only used in pcrecpp.cc, so try this
|
||||||
|
# in a c++ context. This matters becuase strtoimax is C99 and may not
|
||||||
|
# be supported by the C++ compiler.)
|
||||||
|
# Figure out how to create a longlong from a string: strtoll and
|
||||||
|
# equiv. It's not enough to call AC_CHECK_FUNCS: hpux has a
|
||||||
|
# strtoll, for instance, but it only takes 2 args instead of 3!
|
||||||
|
# We have to call AH_TEMPLATE since AC_DEFINE_UNQUOTED below is complex.
|
||||||
|
AH_TEMPLATE(HAVE_STRTOQ, [Define to 1 if you have `strtoq'.])
|
||||||
|
AH_TEMPLATE(HAVE_STRTOLL, [Define to 1 if you have `strtoll'.])
|
||||||
|
AH_TEMPLATE(HAVE__STRTOI64, [Define to 1 if you have `_strtoi64'.])
|
||||||
|
AH_TEMPLATE(HAVE_STRTOIMAX, [Define to 1 if you have `strtoimax'.])
|
||||||
|
have_strto_fn=0
|
||||||
|
for fn in strtoq strtoll _strtoi64 strtoimax; do
|
||||||
|
AC_MSG_CHECKING([for $fn])
|
||||||
|
if test "$fn" = strtoimax; then
|
||||||
|
include=stdint.h
|
||||||
|
else
|
||||||
|
include=stdlib.h
|
||||||
|
fi
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <$include>],
|
||||||
|
[char* e; return $fn("100", &e, 10)])],
|
||||||
|
[AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE_UNQUOTED(HAVE_`echo $fn | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ`, 1,
|
||||||
|
[Define to 1 if you have `$fn'.])
|
||||||
|
have_strto_fn=1
|
||||||
|
break],
|
||||||
|
[AC_MSG_RESULT(no)])
|
||||||
|
done
|
||||||
|
|
||||||
|
if test "$have_strto_fn" = 1; then
|
||||||
|
AC_CHECK_TYPES([long long],
|
||||||
|
[pcre_have_long_long="1"],
|
||||||
|
[pcre_have_long_long="0"])
|
||||||
|
AC_CHECK_TYPES([unsigned long long],
|
||||||
|
[pcre_have_ulong_long="1"],
|
||||||
|
[pcre_have_ulong_long="0"])
|
||||||
|
else
|
||||||
|
pcre_have_long_long="0"
|
||||||
|
pcre_have_ulong_long="0"
|
||||||
|
fi
|
||||||
|
AC_SUBST(pcre_have_long_long)
|
||||||
|
AC_SUBST(pcre_have_ulong_long)
|
||||||
|
|
||||||
AC_LANG_POP
|
AC_LANG_POP
|
||||||
fi
|
fi
|
||||||
# Using AC_SUBST eliminates the need to include config.h in a public .h file
|
# Using AC_SUBST eliminates the need to include config.h in a public .h file
|
||||||
@ -330,32 +542,20 @@ AC_SUBST(pcre_have_type_traits)
|
|||||||
AC_SUBST(pcre_have_bits_type_traits)
|
AC_SUBST(pcre_have_bits_type_traits)
|
||||||
|
|
||||||
# Conditional compilation
|
# Conditional compilation
|
||||||
|
AM_CONDITIONAL(WITH_PCRE8, test "x$enable_pcre8" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
|
||||||
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
|
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
|
||||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||||
|
|
||||||
# Checks for typedefs, structures, and compiler characteristics.
|
# Checks for typedefs, structures, and compiler characteristics.
|
||||||
|
|
||||||
AC_C_CONST
|
AC_C_CONST
|
||||||
AC_TYPE_SIZE_T
|
AC_TYPE_SIZE_T
|
||||||
|
|
||||||
pcre_have_strotolonglong=0
|
|
||||||
AC_CHECK_FUNCS(strtoq strtoll _strtoi64, [pcre_have_strotolonglong="1"; break])
|
|
||||||
# If we can't convert a string to a long long, pretend we don't even
|
|
||||||
# have a long long.
|
|
||||||
if test $pcre_have_strotolonglong = "0"; then
|
|
||||||
pcre_have_long_long="0"
|
|
||||||
pcre_have_ulong_long="0"
|
|
||||||
else
|
|
||||||
AC_CHECK_TYPES([long long],
|
|
||||||
[pcre_have_long_long="1"],
|
|
||||||
[pcre_have_long_long="0"])
|
|
||||||
AC_CHECK_TYPES([unsigned long long],
|
|
||||||
[pcre_have_ulong_long="1"],
|
|
||||||
[pcre_have_ulong_long="0"])
|
|
||||||
fi
|
|
||||||
AC_SUBST(pcre_have_long_long)
|
|
||||||
AC_SUBST(pcre_have_ulong_long)
|
|
||||||
|
|
||||||
# Checks for library functions.
|
# Checks for library functions.
|
||||||
|
|
||||||
AC_CHECK_FUNCS(bcopy memmove strerror)
|
AC_CHECK_FUNCS(bcopy memmove strerror)
|
||||||
@ -365,84 +565,196 @@ AC_CHECK_FUNCS(bcopy memmove strerror)
|
|||||||
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
||||||
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
||||||
|
|
||||||
# Check for the availability of libbz2
|
# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
|
||||||
|
# as for libz. However, this had the following problem, diagnosed and fixed by
|
||||||
|
# a user:
|
||||||
|
#
|
||||||
|
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
|
||||||
|
# under Win32.
|
||||||
|
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
|
||||||
|
# therefore missing the function definition.
|
||||||
|
# - The compiler thus generates a "C" signature for the test function.
|
||||||
|
# - The linker fails to find the "C" function.
|
||||||
|
# - PCRE fails to configure if asked to do so against libbz2.
|
||||||
|
#
|
||||||
|
# Solution:
|
||||||
|
#
|
||||||
|
# - Replace the AC_CHECK_LIB test with a custom test.
|
||||||
|
|
||||||
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
||||||
AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
# Original test
|
||||||
|
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
||||||
|
#
|
||||||
|
# Custom test follows
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for libbz2])
|
||||||
|
OLD_LIBS="$LIBS"
|
||||||
|
LIBS="$LIBS -lbz2"
|
||||||
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
#ifdef HAVE_BZLIB_H
|
||||||
|
#include <bzlib.h>
|
||||||
|
#endif]],
|
||||||
|
[[return (int)BZ2_bzopen("conftest", "rb");]])],
|
||||||
|
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
|
||||||
|
AC_MSG_RESULT([no]))
|
||||||
|
LIBS="$OLD_LIBS"
|
||||||
|
|
||||||
# Check for the availabiity of libreadline
|
# Check for the availabiity of libreadline
|
||||||
|
|
||||||
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||||
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||||
AC_CHECK_LIB([readline], [readline], [HAVE_LIB_READLINE=1])
|
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
|
||||||
|
[LIBREADLINE=""],
|
||||||
|
[-ltermcap])],
|
||||||
|
[-lncursesw])],
|
||||||
|
[-lncurses])],
|
||||||
|
[-lcurses])],
|
||||||
|
[-ltinfo])])
|
||||||
|
AC_SUBST(LIBREADLINE)
|
||||||
|
if test -n "$LIBREADLINE"; then
|
||||||
|
if test "$LIBREADLINE" != "-lreadline"; then
|
||||||
|
echo "-lreadline needs $LIBREADLINE"
|
||||||
|
LIBREADLINE="-lreadline $LIBREADLINE"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Check for the availability of libedit. Different distributions put its
|
||||||
|
# headers in different places. Try to cover the most common ones.
|
||||||
|
|
||||||
|
if test "$enable_pcretest_libedit" = "yes"; then
|
||||||
|
AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
|
||||||
|
[AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
|
||||||
|
[AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
|
||||||
|
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
|
||||||
|
fi
|
||||||
|
|
||||||
# This facilitates -ansi builds under Linux
|
# This facilitates -ansi builds under Linux
|
||||||
dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc])
|
dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc])
|
||||||
|
|
||||||
|
PCRE_STATIC_CFLAG=""
|
||||||
if test "x$enable_shared" = "xno" ; then
|
if test "x$enable_shared" = "xno" ; then
|
||||||
AC_DEFINE([PCRE_STATIC], [1], [
|
AC_DEFINE([PCRE_STATIC], [1], [
|
||||||
Define if linking statically (TODO: make nice with Libtool)])
|
Define to any value if linking statically (TODO: make nice with Libtool)])
|
||||||
|
PCRE_STATIC_CFLAG="-DPCRE_STATIC"
|
||||||
fi
|
fi
|
||||||
|
AC_SUBST(PCRE_STATIC_CFLAG)
|
||||||
|
|
||||||
# Here is where pcre specific defines are handled
|
# Here is where pcre specific defines are handled
|
||||||
|
|
||||||
if test "$enable_utf8" = "yes"; then
|
if test "$enable_pcre8" = "yes"; then
|
||||||
AC_DEFINE([SUPPORT_UTF8], [], [
|
AC_DEFINE([SUPPORT_PCRE8], [], [
|
||||||
Define to enable support for the UTF-8 Unicode encoding. This will
|
Define to any value to enable the 8 bit PCRE library.])
|
||||||
work even in an EBCDIC environment, but it is incompatible with
|
fi
|
||||||
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
|
|
||||||
*or* ASCII/UTF-8, but not both at once.])
|
if test "$enable_pcre16" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_PCRE16], [], [
|
||||||
|
Define to any value to enable the 16 bit PCRE library.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre32" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_PCRE32], [], [
|
||||||
|
Define to any value to enable the 32 bit PCRE library.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_jit" = "yes"; then
|
||||||
|
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||||
|
CC="$PTHREAD_CC"
|
||||||
|
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||||
|
LIBS="$PTHREAD_LIBS $LIBS"
|
||||||
|
AC_DEFINE([SUPPORT_JIT], [], [
|
||||||
|
Define to any value to enable support for Just-In-Time compiling.])
|
||||||
|
else
|
||||||
|
enable_pcregrep_jit="no"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcregrep_jit" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [
|
||||||
|
Define to any value to enable JIT support in pcregrep.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_utf" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_UTF], [], [
|
||||||
|
Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||||
|
This will work even in an EBCDIC environment, but it is incompatible
|
||||||
|
with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
|
||||||
|
code *or* ASCII/UTF-8/16/32, but not both at once.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_unicode_properties" = "yes"; then
|
if test "$enable_unicode_properties" = "yes"; then
|
||||||
AC_DEFINE([SUPPORT_UCP], [], [
|
AC_DEFINE([SUPPORT_UCP], [], [
|
||||||
Define to enable support for Unicode properties])
|
Define to any value to enable support for Unicode properties.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_stack_for_recursion" = "no"; then
|
if test "$enable_stack_for_recursion" = "no"; then
|
||||||
AC_DEFINE([NO_RECURSE], [], [
|
AC_DEFINE([NO_RECURSE], [], [
|
||||||
PCRE uses recursive function calls to handle backtracking while
|
PCRE uses recursive function calls to handle backtracking while
|
||||||
matching. This can sometimes be a problem on systems that have
|
matching. This can sometimes be a problem on systems that have
|
||||||
stacks of limited size. Define NO_RECURSE to get a version that
|
stacks of limited size. Define NO_RECURSE to any value to get a
|
||||||
doesn't use recursion in the match() function; instead it creates
|
version that doesn't use recursion in the match() function; instead
|
||||||
its own stack by steam using pcre_recurse_malloc() to obtain memory
|
it creates its own stack by steam using pcre_recurse_malloc() to obtain
|
||||||
from the heap. For more detail, see the comments and other stuff
|
memory from the heap. For more detail, see the comments and other stuff
|
||||||
just above the match() function. On systems that support it,
|
just above the match() function.])
|
||||||
"configure" can be used to set this in the Makefile
|
|
||||||
(use --disable-stack-for-recursion).])
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_pcregrep_libz" = "yes"; then
|
if test "$enable_pcregrep_libz" = "yes"; then
|
||||||
AC_DEFINE([SUPPORT_LIBZ], [], [
|
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||||
Define to allow pcregrep to be linked with libz, so that it is
|
Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||||
able to handle .gz files.])
|
able to handle .gz files.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_pcregrep_libbz2" = "yes"; then
|
if test "$enable_pcregrep_libbz2" = "yes"; then
|
||||||
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||||
Define to allow pcregrep to be linked with libbz2, so that it is
|
Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||||
able to handle .bz2 files.])
|
is able to handle .bz2 files.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_pcretest_libreadline" = "yes"; then
|
if test $with_pcregrep_bufsize -lt 8192 ; then
|
||||||
|
with_pcregrep_bufsize="8192"
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
|
||||||
|
The value of PCREGREP_BUFSIZE determines the size of buffer used by pcregrep
|
||||||
|
to hold parts of the file it is searching. This is also the minimum value.
|
||||||
|
The actual amount of memory used by pcregrep is three times this number,
|
||||||
|
because it allows for the buffering of "before" and "after" lines.])
|
||||||
|
|
||||||
|
if test "$enable_pcretest_libedit" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||||
|
Define to any value to allow pcretest to be linked with libedit.])
|
||||||
|
LIBREADLINE="$LIBEDIT"
|
||||||
|
elif test "$enable_pcretest_libreadline" = "yes"; then
|
||||||
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||||
Define to allow pcretest to be linked with libreadline.])
|
Define to any value to allow pcretest to be linked with libreadline.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([NEWLINE], [$ac_pcre_newline_value], [
|
AC_DEFINE_UNQUOTED([NEWLINE], [$ac_pcre_newline_value], [
|
||||||
The value of NEWLINE determines the newline character sequence. On
|
The value of NEWLINE determines the default newline character sequence. PCRE
|
||||||
systems that support it, "configure" can be used to override the
|
client programs can override this by selecting other values at run time. In
|
||||||
default, which is 10. The possible values are 10 (LF), 13 (CR),
|
ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 (CRLF); in
|
||||||
3338 (CRLF), -1 (ANY), or -2 (ANYCRLF).])
|
EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or 3349 or 3365
|
||||||
|
(CRLF) because there are two alternative codepoints (0x15 and 0x25) that are
|
||||||
|
used as the NL line terminator that is equivalent to ASCII LF. In both ASCII
|
||||||
|
and EBCDIC environments the value can also be -1 (ANY), or -2 (ANYCRLF).])
|
||||||
|
|
||||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||||
By default, the \R escape sequence matches any Unicode line ending
|
By default, the \R escape sequence matches any Unicode line ending
|
||||||
character or sequence of characters. If BSR_ANYCRLF is defined, this is
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
changed so that backslash-R matches only CR, LF, or CRLF. The build-
|
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||||
time default can be overridden by the user of PCRE at runtime. On
|
The build-time default can be overridden by the user of PCRE at runtime.])
|
||||||
systems that support it, "configure" can be used to override the
|
|
||||||
default.])
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||||
@ -450,8 +762,7 @@ AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
|||||||
links as offsets within the compiled regex. The default is 2, which
|
links as offsets within the compiled regex. The default is 2, which
|
||||||
allows for compiled patterns up to 64K long. This covers the vast
|
allows for compiled patterns up to 64K long. This covers the vast
|
||||||
majority of cases. However, PCRE can also be compiled to use 3 or 4
|
majority of cases. However, PCRE can also be compiled to use 3 or 4
|
||||||
bytes instead. This allows for longer patterns in extreme cases. On
|
bytes instead. This allows for longer patterns in extreme cases.])
|
||||||
systems that support it, "configure" can be used to override this default.])
|
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
||||||
When calling PCRE via the POSIX interface, additional working storage
|
When calling PCRE via the POSIX interface, additional working storage
|
||||||
@ -460,9 +771,7 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
|||||||
interface provides only two. If the number of expected substrings is
|
interface provides only two. If the number of expected substrings is
|
||||||
small, the wrapper function uses space on the stack, because this is
|
small, the wrapper function uses space on the stack, because this is
|
||||||
faster than using malloc() for each call. The threshold above which
|
faster than using malloc() for each call. The threshold above which
|
||||||
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD. On
|
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
|
||||||
systems that support it, "configure" can be used to override this
|
|
||||||
default.])
|
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||||
The value of MATCH_LIMIT determines the default number of times the
|
The value of MATCH_LIMIT determines the default number of times the
|
||||||
@ -471,8 +780,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
|||||||
limit. The limit exists in order to catch runaway regular
|
limit. The limit exists in order to catch runaway regular
|
||||||
expressions that take for ever to determine that they do not match.
|
expressions that take for ever to determine that they do not match.
|
||||||
The default is set very large so that it does not accidentally catch
|
The default is set very large so that it does not accidentally catch
|
||||||
legitimate cases. On systems that support it, "configure" can be
|
legitimate cases.])
|
||||||
used to override this default default.])
|
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
|
AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
|
||||||
The above limit applies to all calls of match(), whether or not they
|
The above limit applies to all calls of match(), whether or not they
|
||||||
@ -483,8 +791,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
|
|||||||
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
||||||
have any useful effect, it must be less than the value of
|
have any useful effect, it must be less than the value of
|
||||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
|
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
|
||||||
There is a runtime method for setting a different limit. On systems
|
There is a runtime method for setting a different limit.])
|
||||||
that support it, "configure" can be used to override the default.])
|
|
||||||
|
|
||||||
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
||||||
This limit is parameterized just in case anybody ever wants to
|
This limit is parameterized just in case anybody ever wants to
|
||||||
@ -500,23 +807,38 @@ AH_VERBATIM([PCRE_EXP_DEFN], [
|
|||||||
/* If you are compiling for a system other than a Unix-like system or
|
/* If you are compiling for a system other than a Unix-like system or
|
||||||
Win32, and it needs some magic to be inserted before the definition
|
Win32, and it needs some magic to be inserted before the definition
|
||||||
of a function that is exported by the library, define this macro to
|
of a function that is exported by the library, define this macro to
|
||||||
contain the relevant magic. If you do not define this macro, it
|
contain the relevant magic. If you do not define this macro, a suitable
|
||||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
__declspec value is used for Windows systems; in other environments
|
||||||
compiler on non-Win32 systems. This macro apears at the start of
|
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||||
every exported function that is part of the external API. It does
|
This macro apears at the start of every exported function that is part
|
||||||
not appear on functions that are "external" in the C sense, but
|
of the external API. It does not appear on functions that are "external"
|
||||||
which are internal to the library. */
|
in the C sense, but which are internal to the library. */
|
||||||
#undef PCRE_EXP_DEFN])
|
#undef PCRE_EXP_DEFN])
|
||||||
|
|
||||||
if test "$enable_ebcdic" = "yes"; then
|
if test "$enable_ebcdic" = "yes"; then
|
||||||
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||||
If you are compiling for a system that uses EBCDIC instead of ASCII
|
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||||
character codes, define this macro as 1. On systems that can use
|
character codes, define this macro to any value. You must also edit the
|
||||||
"configure", this can be done via --enable-ebcdic. PCRE will then
|
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
|
||||||
assume that all input strings are in EBCDIC. If you do not define
|
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
|
||||||
this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode.
|
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
|
||||||
It is not possible to build a version of PCRE that supports both
|
strings are in EBCDIC. If you do not define this macro, PCRE will assume
|
||||||
EBCDIC and UTF-8.])
|
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
|
||||||
|
a version of PCRE that supports both EBCDIC and UTF-8/16/32.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||||
|
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
|
||||||
|
In an EBCDIC environment, define this macro to any value to arrange for
|
||||||
|
the NL character to be 0x25 instead of the default 0x15. NL plays the role
|
||||||
|
that LF does in an ASCII/Unicode environment. The value must also be set in
|
||||||
|
the NEWLINE macro below. On systems that can use "configure" or CMake to
|
||||||
|
set EBCDIC_NL25, the adjustment of NEWLINE is automatic.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_valgrind" = "yes"; then
|
||||||
|
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||||
|
Valgrind support to find invalid memory reads.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Platform specific issues
|
# Platform specific issues
|
||||||
@ -537,6 +859,12 @@ esac
|
|||||||
EXTRA_LIBPCRE_LDFLAGS="$EXTRA_LIBPCRE_LDFLAGS \
|
EXTRA_LIBPCRE_LDFLAGS="$EXTRA_LIBPCRE_LDFLAGS \
|
||||||
$NO_UNDEFINED -version-info libpcre_version"
|
$NO_UNDEFINED -version-info libpcre_version"
|
||||||
|
|
||||||
|
EXTRA_LIBPCRE16_LDFLAGS="$EXTRA_LIBPCRE16_LDFLAGS \
|
||||||
|
$NO_UNDEFINED -version-info libpcre16_version"
|
||||||
|
|
||||||
|
EXTRA_LIBPCRE32_LDFLAGS="$EXTRA_LIBPCRE32_LDFLAGS \
|
||||||
|
$NO_UNDEFINED -version-info libpcre32_version"
|
||||||
|
|
||||||
EXTRA_LIBPCREPOSIX_LDFLAGS="$EXTRA_LIBPCREPOSIX_LDFLAGS \
|
EXTRA_LIBPCREPOSIX_LDFLAGS="$EXTRA_LIBPCREPOSIX_LDFLAGS \
|
||||||
$NO_UNDEFINED -version-info libpcreposix_version"
|
$NO_UNDEFINED -version-info libpcreposix_version"
|
||||||
|
|
||||||
@ -545,11 +873,14 @@ EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS \
|
|||||||
$EXPORT_ALL_SYMBOLS"
|
$EXPORT_ALL_SYMBOLS"
|
||||||
|
|
||||||
AC_SUBST(EXTRA_LIBPCRE_LDFLAGS)
|
AC_SUBST(EXTRA_LIBPCRE_LDFLAGS)
|
||||||
|
AC_SUBST(EXTRA_LIBPCRE16_LDFLAGS)
|
||||||
|
AC_SUBST(EXTRA_LIBPCRE32_LDFLAGS)
|
||||||
AC_SUBST(EXTRA_LIBPCREPOSIX_LDFLAGS)
|
AC_SUBST(EXTRA_LIBPCREPOSIX_LDFLAGS)
|
||||||
AC_SUBST(EXTRA_LIBPCRECPP_LDFLAGS)
|
AC_SUBST(EXTRA_LIBPCRECPP_LDFLAGS)
|
||||||
|
|
||||||
# When we run 'make distcheck', use these arguments.
|
# When we run 'make distcheck', use these arguments. Turning off compiler
|
||||||
DISTCHECK_CONFIGURE_FLAGS="--enable-cpp --enable-unicode-properties"
|
# optimization makes it run faster.
|
||||||
|
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre16 --enable-pcre32 --enable-jit --enable-cpp --enable-unicode-properties"
|
||||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||||
|
|
||||||
# Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is
|
# Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is
|
||||||
@ -583,6 +914,23 @@ AC_SUBST(LIBBZ2)
|
|||||||
|
|
||||||
# Similarly for --enable-pcretest-readline
|
# Similarly for --enable-pcretest-readline
|
||||||
|
|
||||||
|
if test "$enable_pcretest_libedit" = "yes"; then
|
||||||
|
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||||
|
echo "** Cannot use both --enable-pcretest-libedit and --enable-pcretest-readline"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
|
||||||
|
"$HAVE_READLINE_READLINE_H" != "1"; then
|
||||||
|
echo "** Cannot --enable-pcretest-libedit because neither editline/readline.h"
|
||||||
|
echo "** nor readline/readline.h was found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test -z "$LIBEDIT"; then
|
||||||
|
echo "** Cannot --enable-pcretest-libedit because libedit library was not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if test "$enable_pcretest_libreadline" = "yes"; then
|
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||||
if test "$HAVE_READLINE_H" != "1"; then
|
if test "$HAVE_READLINE_H" != "1"; then
|
||||||
echo "** Cannot --enable-pcretest-readline because readline/readline.h was not found."
|
echo "** Cannot --enable-pcretest-readline because readline/readline.h was not found."
|
||||||
@ -592,14 +940,73 @@ if test "$enable_pcretest_libreadline" = "yes"; then
|
|||||||
echo "** Cannot --enable-pcretest-readline because readline/history.h was not found."
|
echo "** Cannot --enable-pcretest-readline because readline/history.h was not found."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
LIBREADLINE="-lreadline"
|
if test -z "$LIBREADLINE"; then
|
||||||
|
echo "** Cannot --enable-pcretest-readline because readline library was not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
AC_SUBST(LIBREADLINE)
|
|
||||||
|
# Check for valgrind
|
||||||
|
|
||||||
|
if test "$enable_valgrind" = "yes"; then
|
||||||
|
m4_ifdef([PKG_CHECK_MODULES],
|
||||||
|
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
|
||||||
|
[AC_MSG_ERROR([pkg-config not supported])])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# test code coverage reporting
|
||||||
|
if test "$enable_coverage" = "yes"; then
|
||||||
|
if test "x$GCC" != "xyes"; then
|
||||||
|
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ccache is incompatible with gcov
|
||||||
|
AC_PATH_PROG([SHTOOL],[shtool],[false])
|
||||||
|
case `$SHTOOL path $CC` in
|
||||||
|
*ccache*) cc_ccache=yes;;
|
||||||
|
*) cc_ccache=no;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if test "$cc_ccache" = "yes"; then
|
||||||
|
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
|
||||||
|
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_ARG_VAR([LCOV],[the ltp lcov program])
|
||||||
|
AC_PATH_PROG([LCOV],[lcov],[false])
|
||||||
|
if test "x$LCOV" = "xfalse"; then
|
||||||
|
AC_MSG_ERROR([lcov not found])
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
|
||||||
|
AC_PATH_PROG([GENHTML],[genhtml],[false])
|
||||||
|
if test "x$GENHTML" = "xfalse"; then
|
||||||
|
AC_MSG_ERROR([genhtml not found])
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_DEFINE([SUPPORT_GCOV],[1], [
|
||||||
|
Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||||
|
are able to generate code coverage reports.])
|
||||||
|
|
||||||
|
# And add flags needed for gcov
|
||||||
|
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
|
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
|
GCOV_LIBS="-lgcov"
|
||||||
|
AC_SUBST([GCOV_CFLAGS])
|
||||||
|
AC_SUBST([GCOV_CXXFLAGS])
|
||||||
|
AC_SUBST([GCOV_LIBS])
|
||||||
|
fi # enable_coverage
|
||||||
|
|
||||||
|
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
|
||||||
|
|
||||||
# Produce these files, in addition to config.h.
|
# Produce these files, in addition to config.h.
|
||||||
AC_CONFIG_FILES(
|
AC_CONFIG_FILES(
|
||||||
Makefile
|
Makefile
|
||||||
libpcre.pc
|
libpcre.pc
|
||||||
|
libpcre16.pc
|
||||||
|
libpcre32.pc
|
||||||
|
libpcreposix.pc
|
||||||
libpcrecpp.pc
|
libpcrecpp.pc
|
||||||
pcre-config
|
pcre-config
|
||||||
pcre.h
|
pcre.h
|
||||||
@ -616,9 +1023,16 @@ AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre_chartables.c])
|
|||||||
|
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|
||||||
# Print out a nice little message after configure is run displaying your
|
# Print out a nice little message after configure is run displaying the
|
||||||
# chosen options.
|
# chosen options.
|
||||||
|
|
||||||
|
ebcdic_nl_code=n/a
|
||||||
|
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||||
|
ebcdic_nl_code=0x25
|
||||||
|
elif test "$enable_ebcdic" = "yes"; then
|
||||||
|
ebcdic_nl_code=0x15
|
||||||
|
fi
|
||||||
|
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
|
|
||||||
$PACKAGE-$VERSION configuration summary:
|
$PACKAGE-$VERSION configuration summary:
|
||||||
@ -630,17 +1044,22 @@ $PACKAGE-$VERSION configuration summary:
|
|||||||
C++ compiler .................... : ${CXX}
|
C++ compiler .................... : ${CXX}
|
||||||
Linker .......................... : ${LD}
|
Linker .......................... : ${LD}
|
||||||
C preprocessor flags ............ : ${CPPFLAGS}
|
C preprocessor flags ............ : ${CPPFLAGS}
|
||||||
C compiler flags ................ : ${CFLAGS}
|
C compiler flags ................ : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||||
C++ compiler flags .............. : ${CXXFLAGS}
|
C++ compiler flags .............. : ${CXXFLAGS} ${VISIBILITY_CXXFLAGS}
|
||||||
Linker flags .................... : ${LDFLAGS}
|
Linker flags .................... : ${LDFLAGS}
|
||||||
Extra libraries ................. : ${LIBS}
|
Extra libraries ................. : ${LIBS}
|
||||||
|
|
||||||
|
Build 8 bit pcre library ........ : ${enable_pcre8}
|
||||||
|
Build 16 bit pcre library ....... : ${enable_pcre16}
|
||||||
|
Build 32 bit pcre library ....... : ${enable_pcre32}
|
||||||
Build C++ library ............... : ${enable_cpp}
|
Build C++ library ............... : ${enable_cpp}
|
||||||
Enable UTF-8 support ............ : ${enable_utf8}
|
Enable JIT compiling support .... : ${enable_jit}
|
||||||
|
Enable UTF-8/16/32 support ...... : ${enable_utf}
|
||||||
Unicode properties .............. : ${enable_unicode_properties}
|
Unicode properties .............. : ${enable_unicode_properties}
|
||||||
Newline char/sequence ........... : ${enable_newline}
|
Newline char/sequence ........... : ${enable_newline}
|
||||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||||
EBCDIC coding ................... : ${enable_ebcdic}
|
EBCDIC coding ................... : ${enable_ebcdic}
|
||||||
|
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
||||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
||||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||||
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
||||||
@ -649,9 +1068,14 @@ $PACKAGE-$VERSION configuration summary:
|
|||||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||||
Build shared libs ............... : ${enable_shared}
|
Build shared libs ............... : ${enable_shared}
|
||||||
Build static libs ............... : ${enable_static}
|
Build static libs ............... : ${enable_static}
|
||||||
|
Use JIT in pcregrep ............. : ${enable_pcregrep_jit}
|
||||||
|
Buffer size for pcregrep ........ : ${with_pcregrep_bufsize}
|
||||||
Link pcregrep with libz ......... : ${enable_pcregrep_libz}
|
Link pcregrep with libz ......... : ${enable_pcregrep_libz}
|
||||||
Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
|
Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
|
||||||
|
Link pcretest with libedit ...... : ${enable_pcretest_libedit}
|
||||||
Link pcretest with libreadline .. : ${enable_pcretest_libreadline}
|
Link pcretest with libreadline .. : ${enable_pcretest_libreadline}
|
||||||
|
Valgrind support ................ : ${enable_valgrind}
|
||||||
|
Code coverage ................... : ${enable_coverage}
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# depcomp - compile a program generating dependencies as side-effects
|
# depcomp - compile a program generating dependencies as side-effects
|
||||||
|
|
||||||
scriptversion=2007-03-29.01
|
scriptversion=2012-03-27.16; # UTC
|
||||||
|
|
||||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007 Free Software
|
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
|
||||||
# Foundation, Inc.
|
# 2011, 2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
@ -17,9 +17,7 @@ scriptversion=2007-03-29.01
|
|||||||
# GNU General Public License for more details.
|
# GNU General Public License for more details.
|
||||||
|
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
# 02110-1301, USA.
|
|
||||||
|
|
||||||
# As a special exception to the GNU General Public License, if you
|
# As a special exception to the GNU General Public License, if you
|
||||||
# distribute this file as part of a program that contains a
|
# distribute this file as part of a program that contains a
|
||||||
@ -30,7 +28,7 @@ scriptversion=2007-03-29.01
|
|||||||
|
|
||||||
case $1 in
|
case $1 in
|
||||||
'')
|
'')
|
||||||
echo "$0: No command. Try \`$0 --help' for more information." 1>&2
|
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||||
exit 1;
|
exit 1;
|
||||||
;;
|
;;
|
||||||
-h | --h*)
|
-h | --h*)
|
||||||
@ -42,11 +40,11 @@ as side-effects.
|
|||||||
|
|
||||||
Environment variables:
|
Environment variables:
|
||||||
depmode Dependency tracking mode.
|
depmode Dependency tracking mode.
|
||||||
source Source file read by `PROGRAMS ARGS'.
|
source Source file read by 'PROGRAMS ARGS'.
|
||||||
object Object file output by `PROGRAMS ARGS'.
|
object Object file output by 'PROGRAMS ARGS'.
|
||||||
DEPDIR directory where to store dependencies.
|
DEPDIR directory where to store dependencies.
|
||||||
depfile Dependency file to output.
|
depfile Dependency file to output.
|
||||||
tmpdepfile Temporary file to use when outputing dependencies.
|
tmpdepfile Temporary file to use when outputting dependencies.
|
||||||
libtool Whether libtool is used (yes/no).
|
libtool Whether libtool is used (yes/no).
|
||||||
|
|
||||||
Report bugs to <bug-automake@gnu.org>.
|
Report bugs to <bug-automake@gnu.org>.
|
||||||
@ -59,6 +57,12 @@ EOF
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
# A tabulation character.
|
||||||
|
tab=' '
|
||||||
|
# A newline character.
|
||||||
|
nl='
|
||||||
|
'
|
||||||
|
|
||||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||||
exit 1
|
exit 1
|
||||||
@ -87,6 +91,29 @@ if test "$depmode" = dashXmstdout; then
|
|||||||
depmode=dashmstdout
|
depmode=dashmstdout
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
cygpath_u="cygpath -u -f -"
|
||||||
|
if test "$depmode" = msvcmsys; then
|
||||||
|
# This is just like msvisualcpp but w/o cygpath translation.
|
||||||
|
# Just convert the backslash-escaped backslashes to single forward
|
||||||
|
# slashes to satisfy depend.m4
|
||||||
|
cygpath_u='sed s,\\\\,/,g'
|
||||||
|
depmode=msvisualcpp
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$depmode" = msvc7msys; then
|
||||||
|
# This is just like msvc7 but w/o cygpath translation.
|
||||||
|
# Just convert the backslash-escaped backslashes to single forward
|
||||||
|
# slashes to satisfy depend.m4
|
||||||
|
cygpath_u='sed s,\\\\,/,g'
|
||||||
|
depmode=msvc7
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$depmode" = xlc; then
|
||||||
|
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
|
||||||
|
gccflag=-qmakedep=gcc,-MF
|
||||||
|
depmode=gcc
|
||||||
|
fi
|
||||||
|
|
||||||
case "$depmode" in
|
case "$depmode" in
|
||||||
gcc3)
|
gcc3)
|
||||||
## gcc 3 implements dependency tracking that does exactly what
|
## gcc 3 implements dependency tracking that does exactly what
|
||||||
@ -141,20 +168,21 @@ gcc)
|
|||||||
## The second -e expression handles DOS-style file names with drive letters.
|
## The second -e expression handles DOS-style file names with drive letters.
|
||||||
sed -e 's/^[^:]*: / /' \
|
sed -e 's/^[^:]*: / /' \
|
||||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||||
## This next piece of magic avoids the `deleted header file' problem.
|
## This next piece of magic avoids the "deleted header file" problem.
|
||||||
## The problem is that when a header file which appears in a .P file
|
## The problem is that when a header file which appears in a .P file
|
||||||
## is deleted, the dependency causes make to die (because there is
|
## is deleted, the dependency causes make to die (because there is
|
||||||
## typically no way to rebuild the header). We avoid this by adding
|
## typically no way to rebuild the header). We avoid this by adding
|
||||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||||
## this for us directly.
|
## this for us directly.
|
||||||
tr ' ' '
|
tr ' ' "$nl" < "$tmpdepfile" |
|
||||||
' < "$tmpdepfile" |
|
## Some versions of gcc put a space before the ':'. On the theory
|
||||||
## Some versions of gcc put a space before the `:'. On the theory
|
|
||||||
## that the space means something, we add a space to the output as
|
## that the space means something, we add a space to the output as
|
||||||
## well.
|
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||||
|
## to the object. Take care to not repeat it in the output.
|
||||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
## correctly. Breaking it into two sed invocations is a workaround.
|
## correctly. Breaking it into two sed invocations is a workaround.
|
||||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||||
|
| sed -e 's/$/ :/' >> "$depfile"
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
@ -186,20 +214,17 @@ sgi)
|
|||||||
# clever and replace this with sed code, as IRIX sed won't handle
|
# clever and replace this with sed code, as IRIX sed won't handle
|
||||||
# lines with more than a fixed number of characters (4096 in
|
# lines with more than a fixed number of characters (4096 in
|
||||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||||
# the IRIX cc adds comments like `#:fec' to the end of the
|
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||||
# dependency line.
|
# dependency line.
|
||||||
tr ' ' '
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
' < "$tmpdepfile" \
|
|
||||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
||||||
tr '
|
tr "$nl" ' ' >> "$depfile"
|
||||||
' ' ' >> $depfile
|
echo >> "$depfile"
|
||||||
echo >> $depfile
|
|
||||||
|
|
||||||
# The second pass generates a dummy entry for each header file.
|
# The second pass generates a dummy entry for each header file.
|
||||||
tr ' ' '
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
' < "$tmpdepfile" \
|
|
||||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||||
>> $depfile
|
>> "$depfile"
|
||||||
else
|
else
|
||||||
# The sourcefile does not contain any dependencies, so just
|
# The sourcefile does not contain any dependencies, so just
|
||||||
# store a dummy comment line, to avoid errors with the Makefile
|
# store a dummy comment line, to avoid errors with the Makefile
|
||||||
@ -209,10 +234,17 @@ sgi)
|
|||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
xlc)
|
||||||
|
# This case exists only to let depend.m4 do its work. It works by
|
||||||
|
# looking at the text of this script. This case will never be run,
|
||||||
|
# since it is checked for above.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
aix)
|
aix)
|
||||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||||
# in a .u file. In older versions, this file always lives in the
|
# in a .u file. In older versions, this file always lives in the
|
||||||
# current directory. Also, the AIX compiler puts `$object:' at the
|
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||||
# start of each line; $object doesn't have directory information.
|
# start of each line; $object doesn't have directory information.
|
||||||
# Version 6 uses the directory in both cases.
|
# Version 6 uses the directory in both cases.
|
||||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||||
@ -242,12 +274,11 @@ aix)
|
|||||||
test -f "$tmpdepfile" && break
|
test -f "$tmpdepfile" && break
|
||||||
done
|
done
|
||||||
if test -f "$tmpdepfile"; then
|
if test -f "$tmpdepfile"; then
|
||||||
# Each line is of the form `foo.o: dependent.h'.
|
# Each line is of the form 'foo.o: dependent.h'.
|
||||||
# Do two passes, one to just change these to
|
# Do two passes, one to just change these to
|
||||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||||
# That's a tab and a space in the [].
|
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||||
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
|
||||||
else
|
else
|
||||||
# The sourcefile does not contain any dependencies, so just
|
# The sourcefile does not contain any dependencies, so just
|
||||||
# store a dummy comment line, to avoid errors with the Makefile
|
# store a dummy comment line, to avoid errors with the Makefile
|
||||||
@ -258,23 +289,26 @@ aix)
|
|||||||
;;
|
;;
|
||||||
|
|
||||||
icc)
|
icc)
|
||||||
# Intel's C compiler understands `-MD -MF file'. However on
|
# Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
|
||||||
# icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
# However on
|
||||||
|
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
||||||
# ICC 7.0 will fill foo.d with something like
|
# ICC 7.0 will fill foo.d with something like
|
||||||
# foo.o: sub/foo.c
|
# foo.o: sub/foo.c
|
||||||
# foo.o: sub/foo.h
|
# foo.o: sub/foo.h
|
||||||
# which is wrong. We want:
|
# which is wrong. We want
|
||||||
# sub/foo.o: sub/foo.c
|
# sub/foo.o: sub/foo.c
|
||||||
# sub/foo.o: sub/foo.h
|
# sub/foo.o: sub/foo.h
|
||||||
# sub/foo.c:
|
# sub/foo.c:
|
||||||
# sub/foo.h:
|
# sub/foo.h:
|
||||||
# ICC 7.1 will output
|
# ICC 7.1 will output
|
||||||
# foo.o: sub/foo.c sub/foo.h
|
# foo.o: sub/foo.c sub/foo.h
|
||||||
# and will wrap long lines using \ :
|
# and will wrap long lines using '\':
|
||||||
# foo.o: sub/foo.c ... \
|
# foo.o: sub/foo.c ... \
|
||||||
# sub/foo.h ... \
|
# sub/foo.h ... \
|
||||||
# ...
|
# ...
|
||||||
|
# tcc 0.9.26 (FIXME still under development at the moment of writing)
|
||||||
|
# will emit a similar output, but also prepend the continuation lines
|
||||||
|
# with horizontal tabulation characters.
|
||||||
"$@" -MD -MF "$tmpdepfile"
|
"$@" -MD -MF "$tmpdepfile"
|
||||||
stat=$?
|
stat=$?
|
||||||
if test $stat -eq 0; then :
|
if test $stat -eq 0; then :
|
||||||
@ -283,15 +317,21 @@ icc)
|
|||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
# Each line is of the form `foo.o: dependent.h',
|
# Each line is of the form 'foo.o: dependent.h',
|
||||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
# or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
|
||||||
# Do two passes, one to just change these to
|
# Do two passes, one to just change these to
|
||||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
|
||||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
< "$tmpdepfile" > "$depfile"
|
||||||
# correctly. Breaking it into two sed invocations is a workaround.
|
sed '
|
||||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
|
s/[ '"$tab"'][ '"$tab"']*/ /g
|
||||||
sed -e 's/$/ :/' >> "$depfile"
|
s/^ *//
|
||||||
|
s/ *\\*$//
|
||||||
|
s/^[^:]*: *//
|
||||||
|
/^$/d
|
||||||
|
/:$/d
|
||||||
|
s/$/ :/
|
||||||
|
' < "$tmpdepfile" >> "$depfile"
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
@ -327,8 +367,13 @@ hp2)
|
|||||||
done
|
done
|
||||||
if test -f "$tmpdepfile"; then
|
if test -f "$tmpdepfile"; then
|
||||||
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||||
# Add `dependent.h:' lines.
|
# Add 'dependent.h:' lines.
|
||||||
sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile"
|
sed -ne '2,${
|
||||||
|
s/^ *//
|
||||||
|
s/ \\*$//
|
||||||
|
s/$/:/
|
||||||
|
p
|
||||||
|
}' "$tmpdepfile" >> "$depfile"
|
||||||
else
|
else
|
||||||
echo "#dummy" > "$depfile"
|
echo "#dummy" > "$depfile"
|
||||||
fi
|
fi
|
||||||
@ -337,9 +382,9 @@ hp2)
|
|||||||
|
|
||||||
tru64)
|
tru64)
|
||||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||||
# effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
|
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
|
||||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||||
# dependencies in `foo.d' instead, so we check for that too.
|
# dependencies in 'foo.d' instead, so we check for that too.
|
||||||
# Subdirectories are respected.
|
# Subdirectories are respected.
|
||||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||||
test "x$dir" = "x$object" && dir=
|
test "x$dir" = "x$object" && dir=
|
||||||
@ -385,14 +430,59 @@ tru64)
|
|||||||
done
|
done
|
||||||
if test -f "$tmpdepfile"; then
|
if test -f "$tmpdepfile"; then
|
||||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||||
# That's a tab and a space in the [].
|
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||||
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
|
||||||
else
|
else
|
||||||
echo "#dummy" > "$depfile"
|
echo "#dummy" > "$depfile"
|
||||||
fi
|
fi
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
msvc7)
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
showIncludes=-Wc,-showIncludes
|
||||||
|
else
|
||||||
|
showIncludes=-showIncludes
|
||||||
|
fi
|
||||||
|
"$@" $showIncludes > "$tmpdepfile"
|
||||||
|
stat=$?
|
||||||
|
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||||
|
if test "$stat" = 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
rm -f "$depfile"
|
||||||
|
echo "$object : \\" > "$depfile"
|
||||||
|
# The first sed program below extracts the file names and escapes
|
||||||
|
# backslashes for cygpath. The second sed program outputs the file
|
||||||
|
# name when reading, but also accumulates all include files in the
|
||||||
|
# hold buffer in order to output them again at the end. This only
|
||||||
|
# works with sed implementations that can handle large buffers.
|
||||||
|
sed < "$tmpdepfile" -n '
|
||||||
|
/^Note: including file: *\(.*\)/ {
|
||||||
|
s//\1/
|
||||||
|
s/\\/\\\\/g
|
||||||
|
p
|
||||||
|
}' | $cygpath_u | sort -u | sed -n '
|
||||||
|
s/ /\\ /g
|
||||||
|
s/\(.*\)/'"$tab"'\1 \\/p
|
||||||
|
s/.\(.*\) \\/\1:/
|
||||||
|
H
|
||||||
|
$ {
|
||||||
|
s/.*/'"$tab"'/
|
||||||
|
G
|
||||||
|
p
|
||||||
|
}' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
msvc7msys)
|
||||||
|
# This case exists only to let depend.m4 do its work. It works by
|
||||||
|
# looking at the text of this script. This case will never be run,
|
||||||
|
# since it is checked for above.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
#nosideeffect)
|
#nosideeffect)
|
||||||
# This comment above is used by automake to tell side-effect
|
# This comment above is used by automake to tell side-effect
|
||||||
# dependency tracking mechanisms from slower ones.
|
# dependency tracking mechanisms from slower ones.
|
||||||
@ -404,13 +494,13 @@ dashmstdout)
|
|||||||
|
|
||||||
# Remove the call to Libtool.
|
# Remove the call to Libtool.
|
||||||
if test "$libtool" = yes; then
|
if test "$libtool" = yes; then
|
||||||
while test $1 != '--mode=compile'; do
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
shift
|
shift
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Remove `-o $object'.
|
# Remove '-o $object'.
|
||||||
IFS=" "
|
IFS=" "
|
||||||
for arg
|
for arg
|
||||||
do
|
do
|
||||||
@ -430,15 +520,14 @@ dashmstdout)
|
|||||||
done
|
done
|
||||||
|
|
||||||
test -z "$dashmflag" && dashmflag=-M
|
test -z "$dashmflag" && dashmflag=-M
|
||||||
# Require at least two characters before searching for `:'
|
# Require at least two characters before searching for ':'
|
||||||
# in the target name. This is to cope with DOS-style filenames:
|
# in the target name. This is to cope with DOS-style filenames:
|
||||||
# a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
|
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||||
"$@" $dashmflag |
|
"$@" $dashmflag |
|
||||||
sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
|
sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
cat < "$tmpdepfile" > "$depfile"
|
cat < "$tmpdepfile" > "$depfile"
|
||||||
tr ' ' '
|
tr ' ' "$nl" < "$tmpdepfile" | \
|
||||||
' < "$tmpdepfile" | \
|
|
||||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
## correctly. Breaking it into two sed invocations is a workaround.
|
## correctly. Breaking it into two sed invocations is a workaround.
|
||||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||||
@ -455,38 +544,46 @@ makedepend)
|
|||||||
"$@" || exit $?
|
"$@" || exit $?
|
||||||
# Remove any Libtool call
|
# Remove any Libtool call
|
||||||
if test "$libtool" = yes; then
|
if test "$libtool" = yes; then
|
||||||
while test $1 != '--mode=compile'; do
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
shift
|
shift
|
||||||
fi
|
fi
|
||||||
# X makedepend
|
# X makedepend
|
||||||
shift
|
shift
|
||||||
cleared=no
|
cleared=no eat=no
|
||||||
for arg in "$@"; do
|
for arg
|
||||||
|
do
|
||||||
case $cleared in
|
case $cleared in
|
||||||
no)
|
no)
|
||||||
set ""; shift
|
set ""; shift
|
||||||
cleared=yes ;;
|
cleared=yes ;;
|
||||||
esac
|
esac
|
||||||
|
if test $eat = yes; then
|
||||||
|
eat=no
|
||||||
|
continue
|
||||||
|
fi
|
||||||
case "$arg" in
|
case "$arg" in
|
||||||
-D*|-I*)
|
-D*|-I*)
|
||||||
set fnord "$@" "$arg"; shift ;;
|
set fnord "$@" "$arg"; shift ;;
|
||||||
# Strip any option that makedepend may not understand. Remove
|
# Strip any option that makedepend may not understand. Remove
|
||||||
# the object too, otherwise makedepend will parse it as a source file.
|
# the object too, otherwise makedepend will parse it as a source file.
|
||||||
|
-arch)
|
||||||
|
eat=yes ;;
|
||||||
-*|$object)
|
-*|$object)
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
set fnord "$@" "$arg"; shift ;;
|
set fnord "$@" "$arg"; shift ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
obj_suffix="`echo $object | sed 's/^.*\././'`"
|
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||||
touch "$tmpdepfile"
|
touch "$tmpdepfile"
|
||||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
cat < "$tmpdepfile" > "$depfile"
|
# makedepend may prepend the VPATH from the source file name to the object.
|
||||||
sed '1,2d' "$tmpdepfile" | tr ' ' '
|
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||||
' | \
|
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||||
|
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
|
||||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
## correctly. Breaking it into two sed invocations is a workaround.
|
## correctly. Breaking it into two sed invocations is a workaround.
|
||||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||||
@ -500,13 +597,13 @@ cpp)
|
|||||||
|
|
||||||
# Remove the call to Libtool.
|
# Remove the call to Libtool.
|
||||||
if test "$libtool" = yes; then
|
if test "$libtool" = yes; then
|
||||||
while test $1 != '--mode=compile'; do
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
shift
|
shift
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Remove `-o $object'.
|
# Remove '-o $object'.
|
||||||
IFS=" "
|
IFS=" "
|
||||||
for arg
|
for arg
|
||||||
do
|
do
|
||||||
@ -538,13 +635,27 @@ cpp)
|
|||||||
|
|
||||||
msvisualcpp)
|
msvisualcpp)
|
||||||
# Important note: in order to support this mode, a compiler *must*
|
# Important note: in order to support this mode, a compiler *must*
|
||||||
# always write the preprocessed file to stdout, regardless of -o,
|
# always write the preprocessed file to stdout.
|
||||||
# because we must use -o when running libtool.
|
|
||||||
"$@" || exit $?
|
"$@" || exit $?
|
||||||
|
|
||||||
|
# Remove the call to Libtool.
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
|
||||||
IFS=" "
|
IFS=" "
|
||||||
for arg
|
for arg
|
||||||
do
|
do
|
||||||
case "$arg" in
|
case "$arg" in
|
||||||
|
-o)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
$object)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||||
set fnord "$@"
|
set fnord "$@"
|
||||||
shift
|
shift
|
||||||
@ -557,16 +668,23 @@ msvisualcpp)
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
"$@" -E |
|
"$@" -E 2>/dev/null |
|
||||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
|
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
echo "$object : \\" > "$depfile"
|
echo "$object : \\" > "$depfile"
|
||||||
. "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
|
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
|
||||||
echo " " >> "$depfile"
|
echo "$tab" >> "$depfile"
|
||||||
. "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
msvcmsys)
|
||||||
|
# This case exists only to let depend.m4 do its work. It works by
|
||||||
|
# looking at the text of this script. This case will never be run,
|
||||||
|
# since it is checked for above.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
none)
|
none)
|
||||||
exec "$@"
|
exec "$@"
|
||||||
;;
|
;;
|
||||||
@ -585,5 +703,6 @@ exit 0
|
|||||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||||
# time-stamp-start: "scriptversion="
|
# time-stamp-start: "scriptversion="
|
||||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||||
# time-stamp-end: "$"
|
# time-stamp-time-zone: "UTC"
|
||||||
|
# time-stamp-end: "; # UTC"
|
||||||
# End:
|
# End:
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2008 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -108,13 +108,26 @@ fprintf(f,
|
|||||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
||||||
"outside this compilation unit might reference this\" and so it will always\n"
|
"outside this compilation unit might reference this\" and so it will always\n"
|
||||||
"be supplied to the linker. */\n\n"
|
"be supplied to the linker. */\n\n");
|
||||||
|
|
||||||
|
/* Force config.h in z/OS */
|
||||||
|
|
||||||
|
#if defined NATIVE_ZOS
|
||||||
|
fprintf(f,
|
||||||
|
"/* For z/OS, config.h is forced */\n"
|
||||||
|
"#ifndef HAVE_CONFIG_H\n"
|
||||||
|
"#define HAVE_CONFIG_H 1\n"
|
||||||
|
"#endif\n\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(f,
|
||||||
"#ifdef HAVE_CONFIG_H\n"
|
"#ifdef HAVE_CONFIG_H\n"
|
||||||
"#include \"config.h\"\n"
|
"#include \"config.h\"\n"
|
||||||
"#endif\n\n"
|
"#endif\n\n"
|
||||||
"#include \"pcre_internal.h\"\n\n");
|
"#include \"pcre_internal.h\"\n\n");
|
||||||
|
|
||||||
fprintf(f,
|
fprintf(f,
|
||||||
"const unsigned char _pcre_default_tables[] = {\n\n"
|
"const pcre_uint8 PRIV(default_tables)[] = {\n\n"
|
||||||
"/* This table is a lower casing table. */\n\n");
|
"/* This table is a lower casing table. */\n\n");
|
||||||
|
|
||||||
fprintf(f, " ");
|
fprintf(f, " ");
|
||||||
|
@ -18,6 +18,12 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<tr><td><a href="pcre.html">pcre</a></td>
|
<tr><td><a href="pcre.html">pcre</a></td>
|
||||||
<td> Introductory page</td></tr>
|
<td> Introductory page</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre16.html">pcre16</a></td>
|
||||||
|
<td> Discussion of the 16-bit PCRE library</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre32.html">pcre32</a></td>
|
||||||
|
<td> Discussion of the 32-bit PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||||
<td> Information about the installation configuration</td></tr>
|
<td> Information about the installation configuration</td></tr>
|
||||||
|
|
||||||
@ -36,9 +42,18 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
|
<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
|
||||||
<td> The C++ wrapper for the PCRE library</td></tr>
|
<td> The C++ wrapper for the PCRE library</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcredemo.html">pcredemo</a></td>
|
||||||
|
<td> A demonstration C program that uses the PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcregrep.html">pcregrep</a></td>
|
<tr><td><a href="pcregrep.html">pcregrep</a></td>
|
||||||
<td> The <b>pcregrep</b> command</td></tr>
|
<td> The <b>pcregrep</b> command</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcrejit.html">pcrejit</a></td>
|
||||||
|
<td> Discussion of the just-in-time optimization support</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcrelimits.html">pcrelimits</a></td>
|
||||||
|
<td> Details of size and other limits</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrematching.html">pcrematching</a></td>
|
<tr><td><a href="pcrematching.html">pcrematching</a></td>
|
||||||
<td> Discussion of the two matching algorithms</td></tr>
|
<td> Discussion of the two matching algorithms</td></tr>
|
||||||
|
|
||||||
@ -58,7 +73,7 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<td> How to save and re-use compiled patterns</td></tr>
|
<td> How to save and re-use compiled patterns</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcresample.html">pcresample</a></td>
|
<tr><td><a href="pcresample.html">pcresample</a></td>
|
||||||
<td> Description of the sample program</td></tr>
|
<td> Discussion of the pcredemo program</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrestack.html">pcrestack</a></td>
|
<tr><td><a href="pcrestack.html">pcrestack</a></td>
|
||||||
<td> Discussion of PCRE's stack usage</td></tr>
|
<td> Discussion of PCRE's stack usage</td></tr>
|
||||||
@ -68,15 +83,22 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
|
|
||||||
<tr><td><a href="pcretest.html">pcretest</a></td>
|
<tr><td><a href="pcretest.html">pcretest</a></td>
|
||||||
<td> The <b>pcretest</b> command for testing PCRE</td></tr>
|
<td> The <b>pcretest</b> command for testing PCRE</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
|
||||||
|
<td> Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
There are also individual pages that summarize the interface for each function
|
There are also individual pages that summarize the interface for each function
|
||||||
in the library:
|
in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
|
||||||
|
functions.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_assign_jit_stack.html">pcre_assign_jit_stack</a></td>
|
||||||
|
<td> Assign stack for JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
|
<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
|
||||||
<td> Compile a regular expression</td></tr>
|
<td> Compile a regular expression</td></tr>
|
||||||
|
|
||||||
@ -96,6 +118,9 @@ in the library:
|
|||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||||
|
<td> Free study data</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(Perl compatible)</td></tr>
|
(Perl compatible)</td></tr>
|
||||||
@ -124,15 +149,30 @@ in the library:
|
|||||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
||||||
<td> Obsolete information extraction function</td></tr>
|
<td> Obsolete information extraction function</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
||||||
|
<td> Create a stack for JIT matching</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_jit_stack_free.html">pcre_jit_stack_free</a></td>
|
||||||
|
<td> Free a JIT matching stack</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
|
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
|
||||||
<td> Build character tables in current locale</td></tr>
|
<td> Build character tables in current locale</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_pattern_to_host_byte_order.html">pcre_pattern_to_host_byte_order</a></td>
|
||||||
|
<td> Convert compiled pattern to host byte order if necessary</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
|
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
|
||||||
<td> Maintain reference count in compiled pattern</td></tr>
|
<td> Maintain reference count in compiled pattern</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_study.html">pcre_study</a></td>
|
<tr><td><a href="pcre_study.html">pcre_study</a></td>
|
||||||
<td> Study a compiled pattern</td></tr>
|
<td> Study a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_utf16_to_host_byte_order.html">pcre_utf16_to_host_byte_order</a></td>
|
||||||
|
<td> Convert UTF-16 string to host byte order if necessary</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_utf32_to_host_byte_order.html">pcre_utf32_to_host_byte_order</a></td>
|
||||||
|
<td> Convert UTF-32 string to host byte order if necessary</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_version.html">pcre_version</a></td>
|
<tr><td><a href="pcre_version.html">pcre_version</a></td>
|
||||||
<td> Return PCRE version and release date</td></tr>
|
<td> Return PCRE version and release date</td></tr>
|
||||||
</table>
|
</table>
|
||||||
|
@ -23,12 +23,17 @@ man page, in case the conversion went wrong.
|
|||||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
|
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
|
||||||
<b>[--libs-posix] [--cflags] [--cflags-posix]</b>
|
<b>[--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
|
||||||
|
<b>[--cflags] [--cflags-posix]</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre-config</b> returns the configuration of the installed PCRE
|
<b>pcre-config</b> returns the configuration of the installed PCRE
|
||||||
libraries and the options required to compile a program to use them.
|
libraries and the options required to compile a program to use them. Some of
|
||||||
|
the options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
|
||||||
|
respectively, and are
|
||||||
|
not available if only one of those libraries has been built. If an unavailable
|
||||||
|
option is encountered, the "usage" information is output.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
|
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -50,12 +55,28 @@ output.
|
|||||||
<P>
|
<P>
|
||||||
<b>--libs</b>
|
<b>--libs</b>
|
||||||
Writes to the standard output the command line options required to link
|
Writes to the standard output the command line options required to link
|
||||||
with PCRE (<b>-lpcre</b> on many systems).
|
with the 8-bit PCRE library (<b>-lpcre</b> on many systems).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>--libs16</b>
|
||||||
|
Writes to the standard output the command line options required to link
|
||||||
|
with the 16-bit PCRE library (<b>-lpcre16</b> on many systems).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>--libs32</b>
|
||||||
|
Writes to the standard output the command line options required to link
|
||||||
|
with the 32-bit PCRE library (<b>-lpcre32</b> on many systems).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>--libs-cpp</b>
|
||||||
|
Writes to the standard output the command line options required to link with
|
||||||
|
PCRE's C++ wrapper library (<b>-lpcrecpp</b> <b>-lpcre</b> on many
|
||||||
|
systems).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--libs-posix</b>
|
<b>--libs-posix</b>
|
||||||
Writes to the standard output the command line options required to link with
|
Writes to the standard output the command line options required to link with
|
||||||
the PCRE posix emulation library (<b>-lpcreposix</b> <b>-lpcre</b> on many
|
PCRE's POSIX API wrapper library (<b>-lpcreposix</b> <b>-lpcre</b> on many
|
||||||
systems).
|
systems).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -67,7 +88,7 @@ many systems).
|
|||||||
<P>
|
<P>
|
||||||
<b>--cflags-posix</b>
|
<b>--cflags-posix</b>
|
||||||
Writes to the standard output the command line options required to compile
|
Writes to the standard output the command line options required to compile
|
||||||
files that use the PCRE posix emulation library (this may include some <b>-I</b>
|
files that use PCRE's POSIX API wrapper library (this may include some <b>-I</b>
|
||||||
options, but is blank on many systems).
|
options, but is blank on many systems).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
|
||||||
@ -77,11 +98,11 @@ options, but is blank on many systems).
|
|||||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
This manual page was originally written by Mark Baker for the Debian GNU/Linux
|
This manual page was originally written by Mark Baker for the Debian GNU/Linux
|
||||||
system. It has been slightly revised as a generic PCRE man page.
|
system. It has been subsequently revised as a generic PCRE man page.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 18 April 2007
|
Last updated: 24 June 2012
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -14,41 +14,69 @@ man page, in case the conversion went wrong.
|
|||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
|
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
|
||||||
<li><a name="TOC2" href="#SEC2">USER DOCUMENTATION</a>
|
<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
|
||||||
<li><a name="TOC3" href="#SEC3">LIMITATIONS</a>
|
<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
|
||||||
<li><a name="TOC4" href="#SEC4">UTF-8 AND UNICODE PROPERTY SUPPORT</a>
|
<li><a name="TOC4" href="#SEC4">AUTHOR</a>
|
||||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
<li><a name="TOC5" href="#SEC5">REVISION</a>
|
||||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
|
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
The PCRE library is a set of functions that implement regular expression
|
The PCRE library is a set of functions that implement regular expression
|
||||||
pattern matching using the same syntax and semantics as Perl, with just a few
|
pattern matching using the same syntax and semantics as Perl, with just a few
|
||||||
differences. Certain features that appeared in Python and PCRE before they
|
differences. Some features that appeared in Python and PCRE before they
|
||||||
appeared in Perl are also available using the Python syntax. There is also some
|
appeared in Perl are also available using the Python syntax, there is some
|
||||||
support for certain .NET and Oniguruma syntax items, and there is an option for
|
support for one or two .NET and Oniguruma syntax items, and there is an option
|
||||||
requesting some minor changes that give better JavaScript compatibility.
|
for requesting some minor changes that give better JavaScript compatibility.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The current implementation of PCRE (release 7.x) corresponds approximately with
|
Starting with release 8.30, it is possible to compile two separate PCRE
|
||||||
Perl 5.10, including support for UTF-8 encoded strings and Unicode general
|
libraries: the original, which supports 8-bit character strings (including
|
||||||
category properties. However, UTF-8 and Unicode support has to be explicitly
|
UTF-8 strings), and a second library that supports 16-bit character strings
|
||||||
|
(including UTF-16 strings). The build process allows either one or both to be
|
||||||
|
built. The majority of the work to make this possible was done by Zoltan
|
||||||
|
Herczeg.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Starting with release 8.32 it is possible to compile a third separate PCRE
|
||||||
|
library, which supports 32-bit character strings (including
|
||||||
|
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
|
||||||
|
libraries. The work to make this possible was done by Christian Persch.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The three libraries contain identical sets of functions, except that the names
|
||||||
|
in the 16-bit library start with <b>pcre16_</b> instead of <b>pcre_</b>, and the
|
||||||
|
names in the 32-bit library start with <b>pcre32_</b> instead of <b>pcre_</b>. To
|
||||||
|
avoid over-complication and reduce the documentation maintenance load, most of
|
||||||
|
the documentation describes the 8-bit library, with the differences for the
|
||||||
|
16-bit and 32-bit libraries described separately in the
|
||||||
|
<a href="pcre16.html"><b>pcre16</b></a>
|
||||||
|
and
|
||||||
|
<a href="pcre32.html"><b>pcre32</b></a>
|
||||||
|
pages. References to functions or structures of the form <i>pcre[16|32]_xxx</i>
|
||||||
|
should be read as meaning "<i>pcre_xxx</i> when using the 8-bit library,
|
||||||
|
<i>pcre16_xxx</i> when using the 16-bit library, or <i>pcre32_xxx</i> when using
|
||||||
|
the 32-bit library".
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The current implementation of PCRE corresponds approximately with Perl 5.12,
|
||||||
|
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||||
|
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||||
release 5.1.
|
release 6.2.0.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In addition to the Perl-compatible matching function, PCRE contains an
|
In addition to the Perl-compatible matching function, PCRE contains an
|
||||||
alternative matching function that matches the same compiled patterns in a
|
alternative function that matches the same compiled patterns in a different
|
||||||
different way. In certain circumstances, the alternative function has some
|
way. In certain circumstances, the alternative function has some advantages.
|
||||||
advantages. For a discussion of the two matching algorithms, see the
|
For a discussion of the two matching algorithms, see the
|
||||||
<a href="pcrematching.html"><b>pcrematching</b></a>
|
<a href="pcrematching.html"><b>pcrematching</b></a>
|
||||||
page.
|
page.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
PCRE is written in C and released as a C library. A number of people have
|
PCRE is written in C and released as a C library. A number of people have
|
||||||
written wrappers and interfaces of various kinds. In particular, Google Inc.
|
written wrappers and interfaces of various kinds. In particular, Google Inc.
|
||||||
have provided a comprehensive C++ wrapper. This is now included as part of the
|
have provided a comprehensive C++ wrapper for the 8-bit library. This is now
|
||||||
PCRE distribution. The
|
included as part of the PCRE distribution. The
|
||||||
<a href="pcrecpp.html"><b>pcrecpp</b></a>
|
<a href="pcrecpp.html"><b>pcrecpp</b></a>
|
||||||
page has details of this interface. Other people's contributions can be found
|
page has details of this interface. Other people's contributions can be found
|
||||||
in the <i>Contrib</i> directory at the primary FTP site, which is:
|
in the <i>Contrib</i> directory at the primary FTP site, which is:
|
||||||
@ -72,216 +100,86 @@ function makes it possible for a client to discover which features are
|
|||||||
available. The features themselves are described in the
|
available. The features themselves are described in the
|
||||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||||
page. Documentation about building PCRE for various operating systems can be
|
page. Documentation about building PCRE for various operating systems can be
|
||||||
found in the <b>README</b> file in the source distribution.
|
found in the <b>README</b> and <b>NON-AUTOTOOLS_BUILD</b> files in the source
|
||||||
|
distribution.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The library contains a number of undocumented internal functions and data
|
The libraries contains a number of undocumented internal functions and data
|
||||||
tables that are used by more than one of the exported external functions, but
|
tables that are used by more than one of the exported external functions, but
|
||||||
which are not intended for use by external callers. Their names all begin with
|
which are not intended for use by external callers. Their names all begin with
|
||||||
"_pcre_", which hopefully will not provoke any name clashes. In some
|
"_pcre_" or "_pcre16_" or "_pcre32_", which hopefully will not provoke any name
|
||||||
environments, it is possible to control which external symbols are exported
|
clashes. In some environments, it is possible to control which external symbols
|
||||||
when a shared library is built, and in these cases the undocumented symbols are
|
are exported when a shared library is built, and in these cases the
|
||||||
not exported.
|
undocumented symbols are not exported.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">USER DOCUMENTATION</a><br>
|
<br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
|
||||||
|
<P>
|
||||||
|
If you are using PCRE in a non-UTF application that permits users to supply
|
||||||
|
arbitrary patterns for compilation, you should be aware of a feature that
|
||||||
|
allows users to turn on UTF support from within a pattern, provided that PCRE
|
||||||
|
was built with UTF support. For example, an 8-bit pattern that begins with
|
||||||
|
"(*UTF8)" or "(*UTF)" turns on UTF-8 mode, which interprets patterns and
|
||||||
|
subjects as strings of UTF-8 characters instead of individual 8-bit characters.
|
||||||
|
This causes both the pattern and any data against which it is matched to be
|
||||||
|
checked for UTF-8 validity. If the data string is very long, such a check might
|
||||||
|
use sufficiently many resources as to cause your application to lose
|
||||||
|
performance.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The best way of guarding against this possibility is to use the
|
||||||
|
<b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If your application is one that supports UTF, be aware that validity checking
|
||||||
|
can take time. If the same data string is to be matched many times, you can use
|
||||||
|
the PCRE_NO_UTF[8|16|32]_CHECK option for the second and subsequent matches to
|
||||||
|
save redundant checks.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Another way that performance can be hit is by running a pattern that has a very
|
||||||
|
large search tree against a string that will never match. Nested unlimited
|
||||||
|
repeats in a pattern are a common example. PCRE provides some protection
|
||||||
|
against this: see the PCRE_EXTRA_MATCH_LIMIT feature in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
|
||||||
<P>
|
<P>
|
||||||
The user documentation for PCRE comprises a number of different sections. In
|
The user documentation for PCRE comprises a number of different sections. In
|
||||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||||
each is a separate page, linked from the index page. In the plain text format,
|
each is a separate page, linked from the index page. In the plain text format,
|
||||||
all the sections are concatenated, for ease of searching. The sections are as
|
all the sections, except the <b>pcredemo</b> section, are concatenated, for ease
|
||||||
follows:
|
of searching. The sections are as follows:
|
||||||
<pre>
|
<pre>
|
||||||
pcre this document
|
pcre this document
|
||||||
|
pcre16 details of the 16-bit library
|
||||||
|
pcre32 details of the 32-bit library
|
||||||
pcre-config show PCRE installation configuration information
|
pcre-config show PCRE installation configuration information
|
||||||
pcreapi details of PCRE's native C API
|
pcreapi details of PCRE's native C API
|
||||||
pcrebuild options for building PCRE
|
pcrebuild options for building PCRE
|
||||||
pcrecallout details of the callout feature
|
pcrecallout details of the callout feature
|
||||||
pcrecompat discussion of Perl compatibility
|
pcrecompat discussion of Perl compatibility
|
||||||
pcrecpp details of the C++ wrapper
|
pcrecpp details of the C++ wrapper for the 8-bit library
|
||||||
pcregrep description of the <b>pcregrep</b> command
|
pcredemo a demonstration C program that uses PCRE
|
||||||
|
pcregrep description of the <b>pcregrep</b> command (8-bit only)
|
||||||
|
pcrejit discussion of the just-in-time optimization support
|
||||||
|
pcrelimits details of size and other limits
|
||||||
pcrematching discussion of the two matching algorithms
|
pcrematching discussion of the two matching algorithms
|
||||||
pcrepartial details of the partial matching facility
|
pcrepartial details of the partial matching facility
|
||||||
pcrepattern syntax and semantics of supported regular expressions
|
pcrepattern syntax and semantics of supported regular expressions
|
||||||
pcresyntax quick syntax reference
|
|
||||||
pcreperform discussion of performance issues
|
pcreperform discussion of performance issues
|
||||||
pcreposix the POSIX-compatible C API
|
pcreposix the POSIX-compatible C API for the 8-bit library
|
||||||
pcreprecompile details of saving and re-using precompiled patterns
|
pcreprecompile details of saving and re-using precompiled patterns
|
||||||
pcresample discussion of the sample program
|
pcresample discussion of the pcredemo program
|
||||||
pcrestack discussion of stack usage
|
pcrestack discussion of stack usage
|
||||||
|
pcresyntax quick syntax reference
|
||||||
pcretest description of the <b>pcretest</b> testing command
|
pcretest description of the <b>pcretest</b> testing command
|
||||||
|
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
||||||
</pre>
|
</pre>
|
||||||
In addition, in the "man" and HTML formats, there is a short page for each
|
In addition, in the "man" and HTML formats, there is a short page for each
|
||||||
C library function, listing its arguments and results.
|
C library function, listing its arguments and results.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">LIMITATIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
|
||||||
There are some size limitations in PCRE but it is hoped that they will never in
|
|
||||||
practice be relevant.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
|
|
||||||
compiled with the default internal linkage size of 2. If you want to process
|
|
||||||
regular expressions that are truly enormous, you can compile PCRE with an
|
|
||||||
internal linkage size of 3 or 4 (see the <b>README</b> file in the source
|
|
||||||
distribution and the
|
|
||||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
|
||||||
documentation for details). In these cases the limit is substantially larger.
|
|
||||||
However, the speed of execution is slower.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
All values in repeating quantifiers must be less than 65536.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
|
||||||
no more than 65535 capturing subpatterns.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The maximum length of name for a named subpattern is 32 characters, and the
|
|
||||||
maximum number of named subpatterns is 10000.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The maximum length of a subject string is the largest positive number that an
|
|
||||||
integer variable can hold. However, when using the traditional matching
|
|
||||||
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
|
|
||||||
This means that the available stack space may limit the size of a subject
|
|
||||||
string that can be processed by certain patterns. For a discussion of stack
|
|
||||||
issues, see the
|
|
||||||
<a href="pcrestack.html"><b>pcrestack</b></a>
|
|
||||||
documentation.
|
|
||||||
<a name="utf8support"></a></P>
|
|
||||||
<br><a name="SEC4" href="#TOC1">UTF-8 AND UNICODE PROPERTY SUPPORT</a><br>
|
|
||||||
<P>
|
|
||||||
From release 3.3, PCRE has had some support for character strings encoded in
|
|
||||||
the UTF-8 format. For release 4.0 this was greatly extended to cover most
|
|
||||||
common requirements, and in release 5.0 additional support for Unicode general
|
|
||||||
category properties was added.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
|
|
||||||
the code, and, in addition, you must call
|
|
||||||
<a href="pcre_compile.html"><b>pcre_compile()</b></a>
|
|
||||||
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
|
|
||||||
(*UTF8). When either of these is the case, both the pattern and any subject
|
|
||||||
strings that are matched against it are treated as UTF-8 strings instead of
|
|
||||||
just strings of bytes.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If you compile PCRE with UTF-8 support, but do not use it at run time, the
|
|
||||||
library will be a bit bigger, but the additional run time overhead is limited
|
|
||||||
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If PCRE is built with Unicode character property support (which implies UTF-8
|
|
||||||
support), the escape sequences \p{..}, \P{..}, and \X are supported.
|
|
||||||
The available properties that can be tested are limited to the general
|
|
||||||
category properties such as Lu for an upper case letter or Nd for a decimal
|
|
||||||
number, the Unicode script names such as Arabic or Han, and the derived
|
|
||||||
properties Any and L&. A full list is given in the
|
|
||||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
|
||||||
documentation. Only the short names for properties are supported. For example,
|
|
||||||
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
|
|
||||||
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
|
||||||
compatibility with Perl 5.6. PCRE does not support this.
|
|
||||||
<a name="utf8strings"></a></P>
|
|
||||||
<br><b>
|
|
||||||
Validity of UTF-8 strings
|
|
||||||
</b><br>
|
|
||||||
<P>
|
|
||||||
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
|
|
||||||
are (by default) checked for validity on entry to the relevant functions. From
|
|
||||||
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
|
|
||||||
themselves derived from the Unicode specification. Earlier releases of PCRE
|
|
||||||
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
|
|
||||||
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
|
|
||||||
U+10FFFF, excluding U+D800 to U+DFFF.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
|
|
||||||
Unicode Standard says this: "The Low Surrogate Area does not contain any
|
|
||||||
character assignments, consequently no character code charts or namelists are
|
|
||||||
provided for this area. Surrogates are reserved for use with UTF-16 and then
|
|
||||||
must be used in pairs." The code points that are encoded by UTF-16 pairs are
|
|
||||||
available as independent code points in the UTF-8 encoding. (In other words,
|
|
||||||
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
|
|
||||||
UTF-8.)
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If an invalid UTF-8 string is passed to PCRE, an error return
|
|
||||||
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
|
|
||||||
your strings are valid, and therefore want to skip these checks in order to
|
|
||||||
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
|
|
||||||
at run time, PCRE assumes that the pattern or subject it is given
|
|
||||||
(respectively) contains only valid UTF-8 codes. In this case, it does not
|
|
||||||
diagnose an invalid UTF-8 string.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
|
|
||||||
happens depends on why the string is invalid. If the string conforms to the
|
|
||||||
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
|
|
||||||
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
|
|
||||||
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
|
|
||||||
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
|
|
||||||
the result is undefined. Your program may crash.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
|
|
||||||
encoded in a UTF-8-like manner as per the old RFC, you can set
|
|
||||||
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
|
|
||||||
situation, you will have to apply your own validity check.
|
|
||||||
</P>
|
|
||||||
<br><b>
|
|
||||||
General comments about UTF-8 mode
|
|
||||||
</b><br>
|
|
||||||
<P>
|
|
||||||
1. An unbraced hexadecimal escape sequence (such as \xb3) matches a two-byte
|
|
||||||
UTF-8 character if the value is greater than 127.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
2. Octal numbers up to \777 are recognized, and match two-byte UTF-8
|
|
||||||
characters for values greater than \177.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
|
|
||||||
bytes, for example: \x{100}{3}.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
5. The escape sequence \C can be used to match a single byte in UTF-8 mode,
|
|
||||||
but its use can lead to some strange effects. This facility is not available in
|
|
||||||
the alternative matching function, <b>pcre_dfa_exec()</b>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
|
|
||||||
test characters of any code value, but the characters that PCRE recognizes as
|
|
||||||
digits, spaces, or word characters remain the same set as before, all with
|
|
||||||
values less than 256. This remains true even when PCRE includes Unicode
|
|
||||||
property support, because to do otherwise would slow down PCRE in many common
|
|
||||||
cases. If you really want to test for a wider sense of, say, "digit", you
|
|
||||||
must use Unicode property tests such as \p{Nd}. Note that this also applies to
|
|
||||||
\b, because it is defined in terms of \w and \W.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
7. Similarly, characters that match the POSIX named character classes are all
|
|
||||||
low-valued characters.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
|
|
||||||
(\h, \H, \v, and \V) do match all the appropriate Unicode characters.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
9. Case-insensitive matching applies only to characters whose values are less
|
|
||||||
than 128, unless PCRE is built with Unicode property support. Even when Unicode
|
|
||||||
property support is available, PCRE still uses its own character tables when
|
|
||||||
checking the case of low-valued characters, so as not to degrade performance.
|
|
||||||
The Unicode property information is used only for characters with higher
|
|
||||||
values. Even when Unicode property support is available, PCRE supports
|
|
||||||
case-insensitive matching only when there is a one-to-one mapping between a
|
|
||||||
letter's cases. There are a small number of many-to-one mappings in Unicode;
|
|
||||||
these are not supported by PCRE.
|
|
||||||
</P>
|
|
||||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -295,11 +193,11 @@ Putting an actual email address here seems to have been a spam magnet, so I've
|
|||||||
taken it away. If you want to email me, use my two initials, followed by the
|
taken it away. If you want to email me, use my two initials, followed by the
|
||||||
two digits 10, at the domain cam.ac.uk.
|
two digits 10, at the domain cam.ac.uk.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 April 2009
|
Last updated: 11 November 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2009 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
383
tools/pcre/doc/html/pcre16.html
Normal file
383
tools/pcre/doc/html/pcre16.html
Normal file
@ -0,0 +1,383 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre16 specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre16 man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<ul>
|
||||||
|
<li><a name="TOC1" href="#SEC1">PCRE 16-BIT API BASIC FUNCTIONS</a>
|
||||||
|
<li><a name="TOC2" href="#SEC2">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a>
|
||||||
|
<li><a name="TOC3" href="#SEC3">PCRE 16-BIT API AUXILIARY FUNCTIONS</a>
|
||||||
|
<li><a name="TOC4" href="#SEC4">PCRE 16-BIT API INDIRECTED FUNCTIONS</a>
|
||||||
|
<li><a name="TOC5" href="#SEC5">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a>
|
||||||
|
<li><a name="TOC6" href="#SEC6">THE PCRE 16-BIT LIBRARY</a>
|
||||||
|
<li><a name="TOC7" href="#SEC7">THE HEADER FILE</a>
|
||||||
|
<li><a name="TOC8" href="#SEC8">THE LIBRARY NAME</a>
|
||||||
|
<li><a name="TOC9" href="#SEC9">STRING TYPES</a>
|
||||||
|
<li><a name="TOC10" href="#SEC10">STRUCTURE TYPES</a>
|
||||||
|
<li><a name="TOC11" href="#SEC11">16-BIT FUNCTIONS</a>
|
||||||
|
<li><a name="TOC12" href="#SEC12">SUBJECT STRING OFFSETS</a>
|
||||||
|
<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
|
||||||
|
<li><a name="TOC14" href="#SEC14">OPTION NAMES</a>
|
||||||
|
<li><a name="TOC15" href="#SEC15">CHARACTER CODES</a>
|
||||||
|
<li><a name="TOC16" href="#SEC16">ERROR NAMES</a>
|
||||||
|
<li><a name="TOC17" href="#SEC17">ERROR TEXTS</a>
|
||||||
|
<li><a name="TOC18" href="#SEC18">CALLOUTS</a>
|
||||||
|
<li><a name="TOC19" href="#SEC19">TESTING</a>
|
||||||
|
<li><a name="TOC20" href="#SEC20">NOT SUPPORTED IN 16-BIT MODE</a>
|
||||||
|
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
|
||||||
|
<li><a name="TOC22" href="#SEC22">REVISION</a>
|
||||||
|
</ul>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b>int *<i>errorcodeptr</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
|
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||||
|
<b>int <i>buffersize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>name</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||||
|
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>const unsigned char *pcre16_maketables(void);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>const char *pcre16_version(void);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||||
|
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>void *(*pcre16_malloc)(size_t);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void (*pcre16_free)(void *);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void *(*pcre16_stack_malloc)(size_t);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void (*pcre16_stack_free)(void *);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||||
|
<b>int <i>keep_boms</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
|
||||||
|
<P>
|
||||||
|
Starting with release 8.30, it is possible to compile a PCRE library that
|
||||||
|
supports 16-bit character strings, including UTF-16 strings, as well as or
|
||||||
|
instead of the original 8-bit library. The majority of the work to make this
|
||||||
|
possible was done by Zoltan Herczeg. The two libraries contain identical sets
|
||||||
|
of functions, used in exactly the same way. Only the names of the functions and
|
||||||
|
the data types of their arguments and results are different. To avoid
|
||||||
|
over-complication and reduce the documentation maintenance load, most of the
|
||||||
|
PCRE documentation describes the 8-bit library, with only occasional references
|
||||||
|
to the 16-bit library. This page describes what is different when you use the
|
||||||
|
16-bit library.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
WARNING: A single application can be linked with both libraries, but you must
|
||||||
|
take care when processing any particular pattern to use functions from just one
|
||||||
|
library. For example, if you want to study a pattern that was compiled with
|
||||||
|
<b>pcre16_compile()</b>, you must do so with <b>pcre16_study()</b>, not
|
||||||
|
<b>pcre_study()</b>, and you must free the study data with
|
||||||
|
<b>pcre16_free_study()</b>.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC7" href="#TOC1">THE HEADER FILE</a><br>
|
||||||
|
<P>
|
||||||
|
There is only one header file, <b>pcre.h</b>. It contains prototypes for all the
|
||||||
|
functions in all libraries, as well as definitions of flags, structures, error
|
||||||
|
codes, etc.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC8" href="#TOC1">THE LIBRARY NAME</a><br>
|
||||||
|
<P>
|
||||||
|
In Unix-like systems, the 16-bit library is called <b>libpcre16</b>, and can
|
||||||
|
normally be accesss by adding <b>-lpcre16</b> to the command for linking an
|
||||||
|
application that uses PCRE.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC9" href="#TOC1">STRING TYPES</a><br>
|
||||||
|
<P>
|
||||||
|
In the 8-bit library, strings are passed to PCRE library functions as vectors
|
||||||
|
of bytes with the C type "char *". In the 16-bit library, strings are passed as
|
||||||
|
vectors of unsigned 16-bit quantities. The macro PCRE_UCHAR16 specifies an
|
||||||
|
appropriate data type, and PCRE_SPTR16 is defined as "const PCRE_UCHAR16 *". In
|
||||||
|
very many environments, "short int" is a 16-bit data type. When PCRE is built,
|
||||||
|
it defines PCRE_UCHAR16 as "unsigned short int", but checks that it really is a
|
||||||
|
16-bit data type. If it is not, the build fails with an error message telling
|
||||||
|
the maintainer to modify the definition appropriately.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC10" href="#TOC1">STRUCTURE TYPES</a><br>
|
||||||
|
<P>
|
||||||
|
The types of the opaque structures that are used for compiled 16-bit patterns
|
||||||
|
and JIT stacks are <b>pcre16</b> and <b>pcre16_jit_stack</b> respectively. The
|
||||||
|
type of the user-accessible structure that is returned by <b>pcre16_study()</b>
|
||||||
|
is <b>pcre16_extra</b>, and the type of the structure that is used for passing
|
||||||
|
data to a callout function is <b>pcre16_callout_block</b>. These structures
|
||||||
|
contain the same fields, with the same names, as their 8-bit counterparts. The
|
||||||
|
only difference is that pointers to character strings are 16-bit instead of
|
||||||
|
8-bit types.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC11" href="#TOC1">16-BIT FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
For every function in the 8-bit library there is a corresponding function in
|
||||||
|
the 16-bit library with a name that starts with <b>pcre16_</b> instead of
|
||||||
|
<b>pcre_</b>. The prototypes are listed above. In addition, there is one extra
|
||||||
|
function, <b>pcre16_utf16_to_host_byte_order()</b>. This is a utility function
|
||||||
|
that converts a UTF-16 character string to host byte order if necessary. The
|
||||||
|
other 16-bit functions expect the strings they are passed to be in host byte
|
||||||
|
order.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <i>input</i> and <i>output</i> arguments of
|
||||||
|
<b>pcre16_utf16_to_host_byte_order()</b> may point to the same address, that is,
|
||||||
|
conversion in place is supported. The output buffer must be at least as long as
|
||||||
|
the input.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <i>length</i> argument specifies the number of 16-bit data units in the
|
||||||
|
input string; a negative value specifies a zero-terminated string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>byte_order</i> is NULL, it is assumed that the string starts off in host
|
||||||
|
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
|
||||||
|
string (commonly as the first character).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>byte_order</i> is not NULL, a non-zero value of the integer to which it
|
||||||
|
points means that the input starts off in host byte order, otherwise the
|
||||||
|
opposite order is assumed. Again, BOMs in the string can change this. The final
|
||||||
|
byte order is passed back at the end of processing.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>keep_boms</i> is not zero, byte-order mark characters (0xfeff) are copied
|
||||||
|
into the output string. Otherwise they are discarded.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The result of the function is the number of 16-bit units placed into the output
|
||||||
|
buffer, including the zero terminator if the string was zero-terminated.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
|
||||||
|
<P>
|
||||||
|
The offsets within subject strings that are returned by the matching functions
|
||||||
|
are in 16-bit units rather than bytes.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||||
|
<P>
|
||||||
|
The name-to-number translation table that is maintained for named subpatterns
|
||||||
|
uses 16-bit characters. The <b>pcre16_get_stringtable_entries()</b> function
|
||||||
|
returns the length of each entry in the table as the number of 16-bit data
|
||||||
|
units.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC14" href="#TOC1">OPTION NAMES</a><br>
|
||||||
|
<P>
|
||||||
|
There are two new general option names, PCRE_UTF16 and PCRE_NO_UTF16_CHECK,
|
||||||
|
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
|
||||||
|
fact, these new options define the same bits in the options word. There is a
|
||||||
|
discussion about the
|
||||||
|
<a href="pcreunicode.html#utf16strings">validity of UTF-16 strings</a>
|
||||||
|
in the
|
||||||
|
<a href="pcreunicode.html"><b>pcreunicode</b></a>
|
||||||
|
page.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
For the <b>pcre16_config()</b> function there is an option PCRE_CONFIG_UTF16
|
||||||
|
that returns 1 if UTF-16 support is configured, otherwise 0. If this option is
|
||||||
|
given to <b>pcre_config()</b> or <b>pcre32_config()</b>, or if the
|
||||||
|
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 option is given to <b>pcre16_config()</b>,
|
||||||
|
the result is the PCRE_ERROR_BADOPTION error.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC15" href="#TOC1">CHARACTER CODES</a><br>
|
||||||
|
<P>
|
||||||
|
In 16-bit mode, when PCRE_UTF16 is not set, character values are treated in the
|
||||||
|
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
|
||||||
|
from 0 to 0xffff instead of 0 to 0xff. Character types for characters less than
|
||||||
|
0xff can therefore be influenced by the locale in the same way as before.
|
||||||
|
Characters greater than 0xff have only one case, and no "type" (such as letter
|
||||||
|
or digit).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In UTF-16 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
|
||||||
|
the exception of values in the range 0xd800 to 0xdfff because those are
|
||||||
|
"surrogate" values that are used in pairs to encode values greater than 0xffff.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
A UTF-16 string can indicate its endianness by special code knows as a
|
||||||
|
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
|
||||||
|
to be in host byte order. A utility function called
|
||||||
|
<b>pcre16_utf16_to_host_byte_order()</b> is provided to help with this (see
|
||||||
|
above).
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC16" href="#TOC1">ERROR NAMES</a><br>
|
||||||
|
<P>
|
||||||
|
The errors PCRE_ERROR_BADUTF16_OFFSET and PCRE_ERROR_SHORTUTF16 correspond to
|
||||||
|
their 8-bit counterparts. The error PCRE_ERROR_BADMODE is given when a compiled
|
||||||
|
pattern is passed to a function that processes patterns in the other
|
||||||
|
mode, for example, if a pattern compiled with <b>pcre_compile()</b> is passed to
|
||||||
|
<b>pcre16_exec()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There are new error codes whose names begin with PCRE_UTF16_ERR for invalid
|
||||||
|
UTF-16 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
|
||||||
|
are described in the section entitled
|
||||||
|
<a href="pcreapi.html#badutf8reasons">"Reason codes for invalid UTF-8 strings"</a>
|
||||||
|
in the main
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page. The UTF-16 errors are:
|
||||||
|
<pre>
|
||||||
|
PCRE_UTF16_ERR1 Missing low surrogate at end of string
|
||||||
|
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
|
||||||
|
PCRE_UTF16_ERR3 Isolated low surrogate
|
||||||
|
PCRE_UTF16_ERR4 Non-character
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC17" href="#TOC1">ERROR TEXTS</a><br>
|
||||||
|
<P>
|
||||||
|
If there is an error while compiling a pattern, the error text that is passed
|
||||||
|
back by <b>pcre16_compile()</b> or <b>pcre16_compile2()</b> is still an 8-bit
|
||||||
|
character string, zero-terminated.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC18" href="#TOC1">CALLOUTS</a><br>
|
||||||
|
<P>
|
||||||
|
The <i>subject</i> and <i>mark</i> fields in the callout block that is passed to
|
||||||
|
a callout function point to 16-bit vectors.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC19" href="#TOC1">TESTING</a><br>
|
||||||
|
<P>
|
||||||
|
The <b>pcretest</b> program continues to operate with 8-bit input and output
|
||||||
|
files, but it can be used for testing the 16-bit library. If it is run with the
|
||||||
|
command line option <b>-16</b>, patterns and subject strings are converted from
|
||||||
|
8-bit to 16-bit before being passed to PCRE, and the 16-bit library functions
|
||||||
|
are used instead of the 8-bit ones. Returned 16-bit strings are converted to
|
||||||
|
8-bit for output. If both the 8-bit and the 32-bit libraries were not compiled,
|
||||||
|
<b>pcretest</b> defaults to 16-bit and the <b>-16</b> option is ignored.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
When PCRE is being built, the <b>RunTest</b> script that is called by "make
|
||||||
|
check" uses the <b>pcretest</b> <b>-C</b> option to discover which of the 8-bit,
|
||||||
|
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC20" href="#TOC1">NOT SUPPORTED IN 16-BIT MODE</a><br>
|
||||||
|
<P>
|
||||||
|
Not all the features of the 8-bit library are available with the 16-bit
|
||||||
|
library. The C++ and POSIX wrapper functions support only the 8-bit library,
|
||||||
|
and the <b>pcregrep</b> program is at present 8-bit only.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
|
||||||
|
<P>
|
||||||
|
Philip Hazel
|
||||||
|
<br>
|
||||||
|
University Computing Service
|
||||||
|
<br>
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
<br>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
||||||
|
<P>
|
||||||
|
Last updated: 08 November 2012
|
||||||
|
<br>
|
||||||
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
|
<br>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
76
tools/pcre/doc/html/pcre_assign_jit_stack.html
Normal file
76
tools/pcre/doc/html/pcre_assign_jit_stack.html
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre_assign_jit_stack specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre_assign_jit_stack man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||||
|
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function provides control over the memory used as a stack at run-time by a
|
||||||
|
call to <b>pcre[16|32]_exec()</b> with a pattern that has been successfully
|
||||||
|
compiled with JIT optimization. The arguments are:
|
||||||
|
<pre>
|
||||||
|
extra the data pointer returned by <b>pcre[16|32]_study()</b>
|
||||||
|
callback a callback function
|
||||||
|
data a JIT stack or a value to be passed to the callback
|
||||||
|
function
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block on
|
||||||
|
the machine stack is used.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must
|
||||||
|
be a valid JIT stack, the result of calling <b>pcre[16|32]_jit_stack_alloc()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>callback</i> not NULL, it is called with <i>data</i> as an argument at
|
||||||
|
the start of matching, in order to set up a JIT stack. If the result is NULL,
|
||||||
|
the internal 32K stack is used; otherwise the return value must be a valid JIT
|
||||||
|
stack, the result of calling <b>pcre[16|32]_jit_stack_alloc()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
You may safely assign the same JIT stack to multiple patterns, as long as they
|
||||||
|
are all matched in the same thread. In a multithread application, each thread
|
||||||
|
must use its own JIT stack. For more details, see the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
page.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -23,13 +23,23 @@ SYNOPSIS
|
|||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function compiles a regular expression into an internal form. It is the
|
This function compiles a regular expression into an internal form. It is the
|
||||||
same as <b>pcre_compile2()</b>, except for the absence of the <i>errorcodeptr</i>
|
same as <b>pcre[16|32]_compile2()</b>, except for the absence of the
|
||||||
argument. Its arguments are:
|
<i>errorcodeptr</i> argument. Its arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>pattern</i> A zero-terminated string containing the
|
<i>pattern</i> A zero-terminated string containing the
|
||||||
regular expression to be compiled
|
regular expression to be compiled
|
||||||
@ -49,7 +59,7 @@ The option bits are:
|
|||||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||||
PCRE_DOTALL . matches anything including NL
|
PCRE_DOTALL . matches anything including NL
|
||||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||||
PCRE_EXTENDED Ignore whitespace and # comments
|
PCRE_EXTENDED Ignore white space and # comments
|
||||||
PCRE_EXTRA PCRE extra features
|
PCRE_EXTRA PCRE extra features
|
||||||
(not much use currently)
|
(not much use currently)
|
||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
@ -63,14 +73,23 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
PCRE_UTF8 Run in UTF-8 mode
|
validity (only relevant if
|
||||||
|
PCRE_UTF16 is set)
|
||||||
|
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF32 is set)
|
||||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF8 is set)
|
PCRE_UTF8 is set)
|
||||||
|
PCRE_UCP Use Unicode properties for \d, \w, etc.
|
||||||
|
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||||
|
PCRE_UTF16 Run in <b>pcre16_compile()</b> UTF-16 mode
|
||||||
|
PCRE_UTF32 Run in <b>pcre32_compile()</b> UTF-32 mode
|
||||||
|
PCRE_UTF8 Run in <b>pcre_compile()</b> UTF-8 mode
|
||||||
</pre>
|
</pre>
|
||||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
|
||||||
PCRE_NO_UTF8_CHECK.
|
PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The yield of the function is a pointer to a private data structure that
|
The yield of the function is a pointer to a private data structure that
|
||||||
|
@ -24,15 +24,25 @@ SYNOPSIS
|
|||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b>int *<i>errorcodeptr</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b>int *<i>errorcodeptr</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
|
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function compiles a regular expression into an internal form. It is the
|
This function compiles a regular expression into an internal form. It is the
|
||||||
same as <b>pcre_compile()</b>, except for the addition of the <i>errorcodeptr</i>
|
same as <b>pcre[16|32]_compile()</b>, except for the addition of the
|
||||||
argument. The arguments are:
|
<i>errorcodeptr</i> argument. The arguments are:
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<pre>
|
<pre>
|
||||||
<i>pattern</i> A zero-terminated string containing the
|
<i>pattern</i> A zero-terminated string containing the
|
||||||
regular expression to be compiled
|
regular expression to be compiled
|
||||||
@ -45,32 +55,45 @@ argument. The arguments are:
|
|||||||
</pre>
|
</pre>
|
||||||
The option bits are:
|
The option bits are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_ANCHORED Force pattern anchoring
|
PCRE_ANCHORED Force pattern anchoring
|
||||||
PCRE_AUTO_CALLOUT Compile automatic callouts
|
PCRE_AUTO_CALLOUT Compile automatic callouts
|
||||||
PCRE_CASELESS Do caseless matching
|
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
||||||
PCRE_DOTALL . matches anything including NL
|
PCRE_CASELESS Do caseless matching
|
||||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||||
PCRE_EXTENDED Ignore whitespace and # comments
|
PCRE_DOTALL . matches anything including NL
|
||||||
PCRE_EXTRA PCRE extra features
|
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||||
(not much use currently)
|
PCRE_EXTENDED Ignore white space and # comments
|
||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_EXTRA PCRE extra features
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
(not much use currently)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
sequences
|
||||||
theses (named ones available)
|
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||||
PCRE_UTF8 Run in UTF-8 mode
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
validity (only relevant if
|
theses (named ones available)
|
||||||
PCRE_UTF8 is set)
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF16 is set)
|
||||||
|
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF32 is set)
|
||||||
|
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF8 is set)
|
||||||
|
PCRE_UCP Use Unicode properties for \d, \w, etc.
|
||||||
|
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||||
|
PCRE_UTF16 Run <b>pcre16_compile()</b> in UTF-16 mode
|
||||||
|
PCRE_UTF32 Run <b>pcre32_compile()</b> in UTF-32 mode
|
||||||
|
PCRE_UTF8 Run <b>pcre_compile()</b> in UTF-8 mode
|
||||||
</pre>
|
</pre>
|
||||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
|
||||||
PCRE_NO_UTF8_CHECK.
|
PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The yield of the function is a pointer to a private data structure that
|
The yield of the function is a pointer to a private data structure that
|
||||||
|
@ -21,19 +21,32 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function makes it possible for a client program to find out which optional
|
This function makes it possible for a client program to find out which optional
|
||||||
features are available in the version of the PCRE library it is using. Its
|
features are available in the version of the PCRE library it is using. The
|
||||||
arguments are as follows:
|
arguments are as follows:
|
||||||
<pre>
|
<pre>
|
||||||
<i>what</i> A code specifying what information is required
|
<i>what</i> A code specifying what information is required
|
||||||
<i>where</i> Points to where to put the data
|
<i>where</i> Points to where to put the data
|
||||||
</pre>
|
</pre>
|
||||||
The available codes are:
|
The <i>where</i> argument must point to an integer variable, except for
|
||||||
|
PCRE_CONFIG_MATCH_LIMIT and PCRE_CONFIG_MATCH_LIMIT_RECURSION, when it must
|
||||||
|
point to an unsigned long integer. The available codes are:
|
||||||
<pre>
|
<pre>
|
||||||
|
PCRE_CONFIG_JIT Availability of just-in-time compiler
|
||||||
|
support (1=yes 0=no)
|
||||||
|
PCRE_CONFIG_JITTARGET String containing information about the
|
||||||
|
target architecture for the JIT compiler,
|
||||||
|
or NULL if there is no JIT support
|
||||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||||
@ -48,16 +61,24 @@ The available codes are:
|
|||||||
0 all Unicode line endings
|
0 all Unicode line endings
|
||||||
1 CR, LF, or CRLF only
|
1 CR, LF, or CRLF only
|
||||||
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
|
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
|
||||||
Threshold of return slots, above
|
Threshold of return slots, above which
|
||||||
which <b>malloc()</b> is used by
|
<b>malloc()</b> is used by the POSIX API
|
||||||
the POSIX API
|
|
||||||
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
|
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
|
||||||
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no)
|
PCRE_CONFIG_UTF16 Availability of UTF-16 support (1=yes
|
||||||
|
0=no); option for <b>pcre16_config()</b>
|
||||||
|
PCRE_CONFIG_UTF32 Availability of UTF-32 support (1=yes
|
||||||
|
0=no); option for <b>pcre32_config()</b>
|
||||||
|
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no);
|
||||||
|
option for <b>pcre_config()</b>
|
||||||
PCRE_CONFIG_UNICODE_PROPERTIES
|
PCRE_CONFIG_UNICODE_PROPERTIES
|
||||||
Availability of Unicode property support
|
Availability of Unicode property support
|
||||||
(1=yes 0=no)
|
(1=yes 0=no)
|
||||||
</pre>
|
</pre>
|
||||||
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise.
|
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. That error
|
||||||
|
is also given if PCRE_CONFIG_UTF16 or PCRE_CONFIG_UTF32 is passed to
|
||||||
|
<b>pcre_config()</b>, if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 is passed to
|
||||||
|
<b>pcre16_config()</b>, or if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 is passed to
|
||||||
|
<b>pcre32_config()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -24,6 +24,18 @@ SYNOPSIS
|
|||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
|
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
|
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -33,8 +45,8 @@ by name, into a given buffer. The arguments are:
|
|||||||
<pre>
|
<pre>
|
||||||
<i>code</i> Pattern that was successfully matched
|
<i>code</i> Pattern that was successfully matched
|
||||||
<i>subject</i> Subject that has been successfully matched
|
<i>subject</i> Subject that has been successfully matched
|
||||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
<i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
|
||||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
<i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
|
||||||
<i>stringname</i> Name of the required substring
|
<i>stringname</i> Name of the required substring
|
||||||
<i>buffer</i> Buffer to receive the string
|
<i>buffer</i> Buffer to receive the string
|
||||||
<i>buffersize</i> Size of buffer
|
<i>buffersize</i> Size of buffer
|
||||||
|
@ -23,6 +23,16 @@ SYNOPSIS
|
|||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b>int <i>buffersize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||||
|
<b>int <i>buffersize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||||
|
<b>int <i>buffersize</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -31,8 +41,8 @@ This is a convenience function for extracting a captured substring into a given
|
|||||||
buffer. The arguments are:
|
buffer. The arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>subject</i> Subject that has been successfully matched
|
<i>subject</i> Subject that has been successfully matched
|
||||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
<i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
|
||||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
<i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
|
||||||
<i>stringnumber</i> Number of the required substring
|
<i>stringnumber</i> Number of the required substring
|
||||||
<i>buffer</i> Buffer to receive the string
|
<i>buffer</i> Buffer to receive the string
|
||||||
<i>buffersize</i> Size of buffer
|
<i>buffersize</i> Size of buffer
|
||||||
|
@ -24,6 +24,18 @@ SYNOPSIS
|
|||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -31,10 +43,11 @@ DESCRIPTION
|
|||||||
This function matches a compiled regular expression against a given subject
|
This function matches a compiled regular expression against a given subject
|
||||||
string, using an alternative matching algorithm that scans the subject string
|
string, using an alternative matching algorithm that scans the subject string
|
||||||
just once (<i>not</i> Perl-compatible). Note that the main, Perl-compatible,
|
just once (<i>not</i> Perl-compatible). Note that the main, Perl-compatible,
|
||||||
matching function is <b>pcre_exec()</b>. The arguments for this function are:
|
matching function is <b>pcre[16|32]_exec()</b>. The arguments for this function
|
||||||
|
are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>code</i> Points to the compiled pattern
|
<i>code</i> Points to the compiled pattern
|
||||||
<i>extra</i> Points to an associated <b>pcre_extra</b> structure,
|
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
<i>subject</i> Points to the subject string
|
<i>subject</i> Points to the subject string
|
||||||
<i>length</i> Length of the subject string, in bytes
|
<i>length</i> Length of the subject string, in bytes
|
||||||
@ -48,44 +61,61 @@ matching function is <b>pcre_exec()</b>. The arguments for this function are:
|
|||||||
</pre>
|
</pre>
|
||||||
The options are:
|
The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||||
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
|
||||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
PCRE_NEWLINE_CR Recognize CR as the only newline sequence
|
||||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
|
||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Recognize LF as the only newline sequence
|
||||||
PCRE_NOTBOL Subject is not the beginning of a line
|
PCRE_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE_NOTEOL Subject is not the end of a line
|
PCRE_NOTEOL Subject is not the end of a line
|
||||||
PCRE_NOTEMPTY An empty string is not a valid match
|
PCRE_NOTEMPTY An empty string is not a valid match
|
||||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
is not a valid match
|
||||||
validity (only relevant if PCRE_UTF8
|
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||||
was set at compile time)
|
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
|
||||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
validity (only relevant if PCRE_UTF16
|
||||||
PCRE_DFA_SHORTEST Return only the shortest match
|
was set at compile time)
|
||||||
PCRE_DFA_RESTART This is a restart after a partial match
|
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
|
||||||
|
validity (only relevant if PCRE_UTF32
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||||
|
validity (only relevant if PCRE_UTF8
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
|
||||||
|
PCRE_PARTIAL_SOFT ) match if no full matches are found
|
||||||
|
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
|
||||||
|
even if there is a full match as well
|
||||||
|
PCRE_DFA_SHORTEST Return only the shortest match
|
||||||
|
PCRE_DFA_RESTART Restart after a partial match
|
||||||
</pre>
|
</pre>
|
||||||
There are restrictions on what may appear in a pattern when using this matching
|
There are restrictions on what may appear in a pattern when using this matching
|
||||||
function. Details are given in the
|
function. Details are given in the
|
||||||
<a href="pcrematching.html"><b>pcrematching</b></a>
|
<a href="pcrematching.html"><b>pcrematching</b></a>
|
||||||
documentation.
|
documentation. For details of partial matching, see the
|
||||||
|
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||||
|
page.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A <b>pcre_extra</b> structure contains the following fields:
|
A <b>pcre[16|32]_extra</b> structure contains the following fields:
|
||||||
<pre>
|
<pre>
|
||||||
<i>flags</i> Bits indicating which fields are set
|
<i>flags</i> Bits indicating which fields are set
|
||||||
<i>study_data</i> Opaque data from <b>pcre_study()</b>
|
<i>study_data</i> Opaque data from <b>pcre[16|32]_study()</b>
|
||||||
<i>match_limit</i> Limit on internal resource use
|
<i>match_limit</i> Limit on internal resource use
|
||||||
<i>match_limit_recursion</i> Limit on internal recursion depth
|
<i>match_limit_recursion</i> Limit on internal recursion depth
|
||||||
<i>callout_data</i> Opaque data passed back to callouts
|
<i>callout_data</i> Opaque data passed back to callouts
|
||||||
<i>tables</i> Points to character tables or is NULL
|
<i>tables</i> Points to character tables or is NULL
|
||||||
|
<i>mark</i> For passing back a *MARK pointer
|
||||||
|
<i>executable_jit</i> Opaque data from JIT compilation
|
||||||
</pre>
|
</pre>
|
||||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
|
||||||
PCRE_EXTRA_TABLES. For this matching function, the <i>match_limit</i> and
|
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. For this
|
||||||
<i>match_limit_recursion</i> fields are not used, and must not be set.
|
matching function, the <i>match_limit</i> and <i>match_limit_recursion</i> fields
|
||||||
|
are not used, and must not be set. The PCRE_EXTRA_EXECUTABLE_JIT flag and
|
||||||
|
the corresponding variable are ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -23,6 +23,16 @@ SYNOPSIS
|
|||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -32,7 +42,7 @@ string, using a matching algorithm that is similar to Perl's. It returns
|
|||||||
offsets to captured substrings. Its arguments are:
|
offsets to captured substrings. Its arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>code</i> Points to the compiled pattern
|
<i>code</i> Points to the compiled pattern
|
||||||
<i>extra</i> Points to an associated <b>pcre_extra</b> structure,
|
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
<i>subject</i> Points to the subject string
|
<i>subject</i> Points to the subject string
|
||||||
<i>length</i> Length of the subject string, in bytes
|
<i>length</i> Length of the subject string, in bytes
|
||||||
@ -44,41 +54,50 @@ offsets to captured substrings. Its arguments are:
|
|||||||
</pre>
|
</pre>
|
||||||
The options are:
|
The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||||
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
PCRE_BSR_UNICODE \R matches all Unicode line endings
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
|
||||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
PCRE_NEWLINE_CR Recognize CR as the only newline sequence
|
||||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
|
||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Recognize LF as the only newline sequence
|
||||||
PCRE_NOTBOL Subject is not the beginning of a line
|
PCRE_NOTBOL Subject string is not the beginning of a line
|
||||||
PCRE_NOTEOL Subject is not the end of a line
|
PCRE_NOTEOL Subject string is not the end of a line
|
||||||
PCRE_NOTEMPTY An empty string is not a valid match
|
PCRE_NOTEMPTY An empty string is not a valid match
|
||||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
is not a valid match
|
||||||
validity (only relevant if PCRE_UTF8
|
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||||
was set at compile time)
|
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
|
||||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
validity (only relevant if PCRE_UTF16
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
|
||||||
|
validity (only relevant if PCRE_UTF32
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||||
|
validity (only relevant if PCRE_UTF8
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
|
||||||
|
PCRE_PARTIAL_SOFT ) match if no full matches are found
|
||||||
|
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
|
||||||
|
if that is found before a full match
|
||||||
</pre>
|
</pre>
|
||||||
There are restrictions on what may appear in a pattern when partial matching is
|
For details of partial matching, see the
|
||||||
requested. For details, see the
|
|
||||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||||
page.
|
page. A <b>pcre_extra</b> structure contains the following fields:
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A <b>pcre_extra</b> structure contains the following fields:
|
|
||||||
<pre>
|
<pre>
|
||||||
<i>flags</i> Bits indicating which fields are set
|
<i>flags</i> Bits indicating which fields are set
|
||||||
<i>study_data</i> Opaque data from <b>pcre_study()</b>
|
<i>study_data</i> Opaque data from <b>pcre[16|32]_study()</b>
|
||||||
<i>match_limit</i> Limit on internal resource use
|
<i>match_limit</i> Limit on internal resource use
|
||||||
<i>match_limit_recursion</i> Limit on internal recursion depth
|
<i>match_limit_recursion</i> Limit on internal recursion depth
|
||||||
<i>callout_data</i> Opaque data passed back to callouts
|
<i>callout_data</i> Opaque data passed back to callouts
|
||||||
<i>tables</i> Points to character tables or is NULL
|
<i>tables</i> Points to character tables or is NULL
|
||||||
|
<i>mark</i> For passing back a *MARK pointer
|
||||||
|
<i>executable_jit</i> Opaque data from JIT compilation
|
||||||
</pre>
|
</pre>
|
||||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
|
||||||
PCRE_EXTRA_TABLES.
|
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>pcre_info specification</title>
|
<title>pcre_free_study specification</title>
|
||||||
</head>
|
</head>
|
||||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
<h1>pcre_info man page</h1>
|
<h1>pcre_free_study man page</h1>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
</p>
|
</p>
|
||||||
@ -19,14 +19,21 @@ SYNOPSIS
|
|||||||
<b>#include <pcre.h></b>
|
<b>#include <pcre.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b>
|
<b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
|
||||||
<b>*<i>firstcharptr</i>);</b>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function is obsolete. You should be using <b>pcre_fullinfo()</b> instead.
|
This function is used to free the memory used for the data generated by a call
|
||||||
|
to <b>pcre[16|32]_study()</b> when it is no longer needed. The argument must be the
|
||||||
|
result of such a call.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
@ -21,13 +21,19 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This is a convenience function for freeing the store obtained by a previous
|
This is a convenience function for freeing the store obtained by a previous
|
||||||
call to <b>pcre_get_substring()</b> or <b>pcre_get_named_substring()</b>. Its
|
call to <b>pcre[16|32]_get_substring()</b> or <b>pcre[16|32]_get_named_substring()</b>.
|
||||||
only argument is a pointer to the string.
|
Its only argument is a pointer to the string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -21,13 +21,19 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This is a convenience function for freeing the store obtained by a previous
|
This is a convenience function for freeing the store obtained by a previous
|
||||||
call to <b>pcre_get_substring_list()</b>. Its only argument is a pointer to the
|
call to <b>pcre[16|32]_get_substring_list()</b>. Its only argument is a pointer to
|
||||||
list of string pointers.
|
the list of string pointers.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -22,6 +22,14 @@ SYNOPSIS
|
|||||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -29,7 +37,7 @@ DESCRIPTION
|
|||||||
This function returns information about a compiled pattern. Its arguments are:
|
This function returns information about a compiled pattern. Its arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>code</i> Compiled regular expression
|
<i>code</i> Compiled regular expression
|
||||||
<i>extra</i> Result of <b>pcre_study()</b> or NULL
|
<i>extra</i> Result of <b>pcre[16|32]_study()</b> or NULL
|
||||||
<i>what</i> What information is required
|
<i>what</i> What information is required
|
||||||
<i>where</i> Where to put the information
|
<i>where</i> Where to put the information
|
||||||
</pre>
|
</pre>
|
||||||
@ -38,20 +46,48 @@ The following information is available:
|
|||||||
PCRE_INFO_BACKREFMAX Number of highest back reference
|
PCRE_INFO_BACKREFMAX Number of highest back reference
|
||||||
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
|
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
|
||||||
PCRE_INFO_DEFAULT_TABLES Pointer to default tables
|
PCRE_INFO_DEFAULT_TABLES Pointer to default tables
|
||||||
PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or
|
PCRE_INFO_FIRSTBYTE Fixed first data unit for a match, or
|
||||||
-1 for start of string
|
-1 for start of string
|
||||||
or after newline, or
|
or after newline, or
|
||||||
-2 otherwise
|
-2 otherwise
|
||||||
PCRE_INFO_FIRSTTABLE Table of first bytes (after studying)
|
PCRE_INFO_FIRSTTABLE Table of first data units (after studying)
|
||||||
|
PCRE_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist
|
||||||
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||||
PCRE_INFO_LASTLITERAL Literal last byte required
|
PCRE_INFO_JIT Return 1 after successful JIT compilation
|
||||||
|
PCRE_INFO_JITSIZE Size of JIT compiled code
|
||||||
|
PCRE_INFO_LASTLITERAL Literal last data unit required
|
||||||
|
PCRE_INFO_MINLENGTH Lower bound length of matching strings
|
||||||
PCRE_INFO_NAMECOUNT Number of named subpatterns
|
PCRE_INFO_NAMECOUNT Number of named subpatterns
|
||||||
PCRE_INFO_NAMEENTRYSIZE Size of name table entry
|
PCRE_INFO_NAMEENTRYSIZE Size of name table entry
|
||||||
PCRE_INFO_NAMETABLE Pointer to name table
|
PCRE_INFO_NAMETABLE Pointer to name table
|
||||||
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
|
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
|
||||||
|
(always returns 1 after release 8.00)
|
||||||
PCRE_INFO_OPTIONS Option bits used for compilation
|
PCRE_INFO_OPTIONS Option bits used for compilation
|
||||||
PCRE_INFO_SIZE Size of compiled pattern
|
PCRE_INFO_SIZE Size of compiled pattern
|
||||||
PCRE_INFO_STUDYSIZE Size of study data
|
PCRE_INFO_STUDYSIZE Size of study data
|
||||||
|
PCRE_INFO_FIRSTCHARACTER Fixed first data unit for a match
|
||||||
|
PCRE_INFO_FIRSTCHARACTERFLAGS Returns
|
||||||
|
1 if there is a first data character set, which can
|
||||||
|
then be retrieved using PCRE_INFO_FIRSTCHARACTER,
|
||||||
|
2 if the first character is at the start of the data
|
||||||
|
string or after a newline, and
|
||||||
|
0 otherwise
|
||||||
|
PCRE_INFO_REQUIREDCHAR Literal last data unit required
|
||||||
|
PCRE_INFO_REQUIREDCHARFLAGS Returns 1 if the last data character is set (which can then
|
||||||
|
be retrieved using PCRE_INFO_REQUIREDCHAR); 0 otherwise
|
||||||
|
</pre>
|
||||||
|
The <i>where</i> argument must point to an integer variable, except for the
|
||||||
|
following <i>what</i> values:
|
||||||
|
<pre>
|
||||||
|
PCRE_INFO_DEFAULT_TABLES const unsigned char *
|
||||||
|
PCRE_INFO_FIRSTTABLE const unsigned char *
|
||||||
|
PCRE_INFO_NAMETABLE PCRE_SPTR16 (16-bit library)
|
||||||
|
PCRE_INFO_NAMETABLE PCRE_SPTR32 (32-bit library)
|
||||||
|
PCRE_INFO_NAMETABLE const unsigned char * (8-bit library)
|
||||||
|
PCRE_INFO_OPTIONS unsigned long int
|
||||||
|
PCRE_INFO_SIZE size_t
|
||||||
|
PCRE_INFO_FIRSTCHARACTER uint32_t
|
||||||
|
PCRE_INFO_REQUIREDCHAR uint32_t
|
||||||
</pre>
|
</pre>
|
||||||
The yield of the function is zero on success or:
|
The yield of the function is zero on success or:
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -24,6 +24,18 @@ SYNOPSIS
|
|||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b>const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -33,16 +45,17 @@ arguments are:
|
|||||||
<pre>
|
<pre>
|
||||||
<i>code</i> Compiled pattern
|
<i>code</i> Compiled pattern
|
||||||
<i>subject</i> Subject that has been successfully matched
|
<i>subject</i> Subject that has been successfully matched
|
||||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
<i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
|
||||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
<i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
|
||||||
<i>stringname</i> Name of the required substring
|
<i>stringname</i> Name of the required substring
|
||||||
<i>stringptr</i> Where to put the string pointer
|
<i>stringptr</i> Where to put the string pointer
|
||||||
</pre>
|
</pre>
|
||||||
The memory in which the substring is placed is obtained by calling
|
The memory in which the substring is placed is obtained by calling
|
||||||
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can
|
<b>pcre[16|32]_malloc()</b>. The convenience function
|
||||||
be used to free it when it is no longer needed. The yield of the function is
|
<b>pcre[16|32]_free_substring()</b> can be used to free it when it is no longer
|
||||||
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory
|
needed. The yield of the function is the length of the extracted substring,
|
||||||
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
||||||
|
PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -22,6 +22,14 @@ SYNOPSIS
|
|||||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>);</b>
|
<b>const char *<i>name</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>name</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 <i>name</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -35,8 +43,8 @@ parenthesis in a compiled pattern. Its arguments are:
|
|||||||
The yield of the function is the number of the parenthesis if the name is
|
The yield of the function is the number of the parenthesis if the name is
|
||||||
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
|
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
|
||||||
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
|
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
|
||||||
<b>pcre_get_stringnumber()</b>. You can obtain the complete list by calling
|
<b>pcre[16|32]_get_stringnumber()</b>. You can obtain the complete list by calling
|
||||||
<b>pcre_get_stringtable_entries()</b>.
|
<b>pcre[16|32]_get_stringtable_entries()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -22,6 +22,14 @@ SYNOPSIS
|
|||||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -29,7 +37,7 @@ DESCRIPTION
|
|||||||
This convenience function finds, for a compiled pattern, the first and last
|
This convenience function finds, for a compiled pattern, the first and last
|
||||||
entries for a given name in the table that translates capturing parenthesis
|
entries for a given name in the table that translates capturing parenthesis
|
||||||
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
|
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
|
||||||
<i>not</i> set), it is usually easier to use <b>pcre_get_stringnumber()</b>
|
<i>not</i> set), it is usually easier to use <b>pcre[16|32]_get_stringnumber()</b>
|
||||||
instead.
|
instead.
|
||||||
<pre>
|
<pre>
|
||||||
<i>code</i> Compiled regular expression
|
<i>code</i> Compiled regular expression
|
||||||
|
@ -23,6 +23,16 @@ SYNOPSIS
|
|||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b>const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -31,16 +41,17 @@ This is a convenience function for extracting a captured substring. The
|
|||||||
arguments are:
|
arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>subject</i> Subject that has been successfully matched
|
<i>subject</i> Subject that has been successfully matched
|
||||||
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used
|
<i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
|
||||||
<i>stringcount</i> Value returned by <b>pcre_exec()</b>
|
<i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
|
||||||
<i>stringnumber</i> Number of the required substring
|
<i>stringnumber</i> Number of the required substring
|
||||||
<i>stringptr</i> Where to put the string pointer
|
<i>stringptr</i> Where to put the string pointer
|
||||||
</pre>
|
</pre>
|
||||||
The memory in which the substring is placed is obtained by calling
|
The memory in which the substring is placed is obtained by calling
|
||||||
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can
|
<b>pcre[16|32]_malloc()</b>. The convenience function
|
||||||
be used to free it when it is no longer needed. The yield of the function is
|
<b>pcre[16|32]_free_substring()</b> can be used to free it when it is no longer
|
||||||
the length of the substring, PCRE_ERROR_NOMEMORY if sufficient memory could not
|
needed. The yield of the function is the length of the substring,
|
||||||
be obtained, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
|
PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
||||||
|
PCRE_ERROR_NOSUBSTRING if the string number is invalid.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
@ -22,6 +22,14 @@ SYNOPSIS
|
|||||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||||
|
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||||
|
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -30,17 +38,17 @@ This is a convenience function for extracting a list of all the captured
|
|||||||
substrings. The arguments are:
|
substrings. The arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>subject</i> Subject that has been successfully matched
|
<i>subject</i> Subject that has been successfully matched
|
||||||
<i>ovector</i> Offset vector that <b>pcre_exec</b> used
|
<i>ovector</i> Offset vector that <b>pcre[16|32]_exec</b> used
|
||||||
<i>stringcount</i> Value returned by <b>pcre_exec</b>
|
<i>stringcount</i> Value returned by <b>pcre[16|32]_exec</b>
|
||||||
<i>listptr</i> Where to put a pointer to the list
|
<i>listptr</i> Where to put a pointer to the list
|
||||||
</pre>
|
</pre>
|
||||||
The memory in which the substrings and the list are placed is obtained by
|
The memory in which the substrings and the list are placed is obtained by
|
||||||
calling <b>pcre_malloc()</b>. The convenience function
|
calling <b>pcre[16|32]_malloc()</b>. The convenience function
|
||||||
<b>pcre_free_substring_list()</b> can be used to free it when it is no longer
|
<b>pcre[16|32]_free_substring_list()</b> can be used to free it when it is no
|
||||||
needed. A pointer to a list of pointers is put in the variable whose address is
|
longer needed. A pointer to a list of pointers is put in the variable whose
|
||||||
in <i>listptr</i>. The list is terminated by a NULL pointer. The yield of the
|
address is in <i>listptr</i>. The list is terminated by a NULL pointer. The
|
||||||
function is zero on success or PCRE_ERROR_NOMEMORY if sufficient memory could
|
yield of the function is zero on success or PCRE_ERROR_NOMEMORY if sufficient
|
||||||
not be obtained.
|
memory could not be obtained.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
108
tools/pcre/doc/html/pcre_jit_exec.html
Normal file
108
tools/pcre/doc/html/pcre_jit_exec.html
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre_jit_exec specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre_jit_exec man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
|
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function matches a compiled regular expression that has been successfully
|
||||||
|
studied with one of the JIT options against a given subject string, using a
|
||||||
|
matching algorithm that is similar to Perl's. It is a "fast path" interface to
|
||||||
|
JIT, and it bypasses some of the sanity checks that <b>pcre_exec()</b> applies.
|
||||||
|
It returns offsets to captured substrings. Its arguments are:
|
||||||
|
<pre>
|
||||||
|
<i>code</i> Points to the compiled pattern
|
||||||
|
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||||
|
or is NULL
|
||||||
|
<i>subject</i> Points to the subject string
|
||||||
|
<i>length</i> Length of the subject string, in bytes
|
||||||
|
<i>startoffset</i> Offset in bytes in the subject at which to
|
||||||
|
start matching
|
||||||
|
<i>options</i> Option bits
|
||||||
|
<i>ovector</i> Points to a vector of ints for result offsets
|
||||||
|
<i>ovecsize</i> Number of elements in the vector (a multiple of 3)
|
||||||
|
<i>jstack</i> Pointer to a JIT stack
|
||||||
|
</pre>
|
||||||
|
The allowed options are:
|
||||||
|
<pre>
|
||||||
|
PCRE_NOTBOL Subject string is not the beginning of a line
|
||||||
|
PCRE_NOTEOL Subject string is not the end of a line
|
||||||
|
PCRE_NOTEMPTY An empty string is not a valid match
|
||||||
|
PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||||
|
is not a valid match
|
||||||
|
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
|
||||||
|
validity (only relevant if PCRE_UTF16
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
|
||||||
|
validity (only relevant if PCRE_UTF32
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||||
|
validity (only relevant if PCRE_UTF8
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
|
||||||
|
PCRE_PARTIAL_SOFT ) match if no full matches are found
|
||||||
|
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
|
||||||
|
if that is found before a full match
|
||||||
|
</pre>
|
||||||
|
However, the PCRE_NO_UTF[8|16|32]_CHECK options have no effect, as this check
|
||||||
|
is never applied. For details of partial matching, see the
|
||||||
|
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||||
|
page. A <b>pcre_extra</b> structure contains the following fields:
|
||||||
|
<pre>
|
||||||
|
<i>flags</i> Bits indicating which fields are set
|
||||||
|
<i>study_data</i> Opaque data from <b>pcre[16|32]_study()</b>
|
||||||
|
<i>match_limit</i> Limit on internal resource use
|
||||||
|
<i>match_limit_recursion</i> Limit on internal recursion depth
|
||||||
|
<i>callout_data</i> Opaque data passed back to callouts
|
||||||
|
<i>tables</i> Points to character tables or is NULL
|
||||||
|
<i>mark</i> For passing back a *MARK pointer
|
||||||
|
<i>executable_jit</i> Opaque data from JIT compilation
|
||||||
|
</pre>
|
||||||
|
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||||
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
|
||||||
|
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page and a description of the JIT API in the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
55
tools/pcre/doc/html/pcre_jit_stack_alloc.html
Normal file
55
tools/pcre/doc/html/pcre_jit_stack_alloc.html
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre_jit_stack_alloc specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre_jit_stack_alloc man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
|
<b>int <i>maxsize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
|
<b>int <i>maxsize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
|
<b>int <i>maxsize</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function is used to create a stack for use by the code compiled by the JIT
|
||||||
|
optimization of <b>pcre[16|32]_study()</b>. The arguments are a starting size for
|
||||||
|
the stack, and a maximum size to which it is allowed to grow. The result can be
|
||||||
|
passed to the JIT run-time code by <b>pcre[16|32]_assign_jit_stack()</b>, or that
|
||||||
|
function can set up a callback for obtaining a stack. A maximum stack size of
|
||||||
|
512K to 1M should be more than enough for any pattern. For more details, see
|
||||||
|
the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
page.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
48
tools/pcre/doc/html/pcre_jit_stack_free.html
Normal file
48
tools/pcre/doc/html/pcre_jit_stack_free.html
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre_jit_stack_free specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre_jit_stack_free man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function is used to free a JIT stack that was created by
|
||||||
|
<b>pcre[16|32]_jit_stack_alloc()</b> when it is no longer needed. For more details,
|
||||||
|
see the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
page.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -21,15 +21,21 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>const unsigned char *pcre_maketables(void);</b>
|
<b>const unsigned char *pcre_maketables(void);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>const unsigned char *pcre16_maketables(void);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>const unsigned char *pcre32_maketables(void);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function builds a set of character tables for character values less than
|
This function builds a set of character tables for character values less than
|
||||||
256. These can be passed to <b>pcre_compile()</b> to override PCRE's internal,
|
256. These can be passed to <b>pcre[16|32]_compile()</b> to override PCRE's
|
||||||
built-in tables (which were made by <b>pcre_maketables()</b> when PCRE was
|
internal, built-in tables (which were made by <b>pcre[16|32]_maketables()</b> when
|
||||||
compiled). You might want to do this if you are using a non-standard locale.
|
PCRE was compiled). You might want to do this if you are using a non-standard
|
||||||
The function yields a pointer to the tables.
|
locale. The function yields a pointer to the tables.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
58
tools/pcre/doc/html/pcre_pattern_to_host_byte_order.html
Normal file
58
tools/pcre/doc/html/pcre_pattern_to_host_byte_order.html
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre_pattern_to_host_byte_order specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre_pattern_to_host_byte_order man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||||
|
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||||
|
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||||
|
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function ensures that the bytes in 2-byte and 4-byte values in a compiled
|
||||||
|
pattern are in the correct order for the current host. It is useful when a
|
||||||
|
pattern that has been compiled on one host is transferred to another that might
|
||||||
|
have different endianness. The arguments are:
|
||||||
|
<pre>
|
||||||
|
<i>code</i> A compiled regular expression
|
||||||
|
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||||
|
or is NULL
|
||||||
|
<i>tables</i> Pointer to character tables, or NULL to
|
||||||
|
set the built-in default
|
||||||
|
</pre>
|
||||||
|
The result is 0 for success, a negative PCRE_ERROR_xxx value otherwise.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -21,6 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
|
@ -22,6 +22,14 @@ SYNOPSIS
|
|||||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b>const char **<i>errptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||||
|
<b>const char **<i>errptr</i>);</b>
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -30,11 +38,12 @@ This function studies a compiled pattern, to see if additional information can
|
|||||||
be extracted that might speed up matching. Its arguments are:
|
be extracted that might speed up matching. Its arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>code</i> A compiled regular expression
|
<i>code</i> A compiled regular expression
|
||||||
<i>options</i> Options for <b>pcre_study()</b>
|
<i>options</i> Options for <b>pcre[16|32]_study()</b>
|
||||||
<i>errptr</i> Where to put an error message
|
<i>errptr</i> Where to put an error message
|
||||||
</pre>
|
</pre>
|
||||||
If the function succeeds, it returns a value that can be passed to
|
If the function succeeds, it returns a value that can be passed to
|
||||||
<b>pcre_exec()</b> via its <i>extra</i> argument.
|
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> via their <i>extra</i>
|
||||||
|
arguments.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If the function returns NULL, either it could not find any additional
|
If the function returns NULL, either it could not find any additional
|
||||||
@ -42,8 +51,11 @@ information, or there was an error. You can tell the difference by looking at
|
|||||||
the error value. It is NULL in first case.
|
the error value. It is NULL in first case.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There are currently no options defined; the value of the second argument should
|
The only option is PCRE_STUDY_JIT_COMPILE. It requests just-in-time compilation
|
||||||
always be zero.
|
if possible. If PCRE has been compiled without JIT support, this option is
|
||||||
|
ignored. See the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
page for further details.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
|
57
tools/pcre/doc/html/pcre_utf16_to_host_byte_order.html
Normal file
57
tools/pcre/doc/html/pcre_utf16_to_host_byte_order.html
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre_utf16_to_host_byte_order specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre_utf16_to_host_byte_order man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||||
|
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||||
|
<b>int <i>keep_boms</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function, which exists only in the 16-bit library, converts a UTF-16
|
||||||
|
string to the correct order for the current host, taking account of any byte
|
||||||
|
order marks (BOMs) within the string. Its arguments are:
|
||||||
|
<pre>
|
||||||
|
<i>output</i> pointer to output buffer, may be the same as <i>input</i>
|
||||||
|
<i>input</i> pointer to input buffer
|
||||||
|
<i>length</i> number of 16-bit units in the input, or negative for
|
||||||
|
a zero-terminated string
|
||||||
|
<i>host_byte_order</i> a NULL value or a non-zero value pointed to means
|
||||||
|
start in host byte order
|
||||||
|
<i>keep_boms</i> if non-zero, BOMs are copied to the output string
|
||||||
|
</pre>
|
||||||
|
The result of the function is the number of 16-bit units placed into the output
|
||||||
|
buffer, including the zero terminator if the string was zero-terminated.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>host_byte_order</i> is not NULL, it is set to indicate the byte order that
|
||||||
|
is current at the end of the string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -19,13 +19,20 @@ SYNOPSIS
|
|||||||
<b>#include <pcre.h></b>
|
<b>#include <pcre.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>char *pcre_version(void);</b>
|
<b>const char *pcre_version(void);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>const char *pcre16_version(void);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>const char *pcre32_version(void);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function returns a character string that gives the version number of the
|
This function (even in the 16-bit and 32-bit libraries) returns a
|
||||||
|
zero-terminated, 8-bit character string that gives the version number of the
|
||||||
PCRE library and the date of its release.
|
PCRE library and the date of its release.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -14,23 +14,28 @@ man page, in case the conversion went wrong.
|
|||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
|
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
|
||||||
<li><a name="TOC2" href="#SEC2">C++ SUPPORT</a>
|
<li><a name="TOC2" href="#SEC2">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||||
<li><a name="TOC3" href="#SEC3">UTF-8 SUPPORT</a>
|
<li><a name="TOC3" href="#SEC3">BUILDING SHARED AND STATIC LIBRARIES</a>
|
||||||
<li><a name="TOC4" href="#SEC4">UNICODE CHARACTER PROPERTY SUPPORT</a>
|
<li><a name="TOC4" href="#SEC4">C++ SUPPORT</a>
|
||||||
<li><a name="TOC5" href="#SEC5">CODE VALUE OF NEWLINE</a>
|
<li><a name="TOC5" href="#SEC5">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
|
||||||
<li><a name="TOC6" href="#SEC6">WHAT \R MATCHES</a>
|
<li><a name="TOC6" href="#SEC6">UNICODE CHARACTER PROPERTY SUPPORT</a>
|
||||||
<li><a name="TOC7" href="#SEC7">BUILDING SHARED AND STATIC LIBRARIES</a>
|
<li><a name="TOC7" href="#SEC7">JUST-IN-TIME COMPILER SUPPORT</a>
|
||||||
<li><a name="TOC8" href="#SEC8">POSIX MALLOC USAGE</a>
|
<li><a name="TOC8" href="#SEC8">CODE VALUE OF NEWLINE</a>
|
||||||
<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a>
|
<li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
|
||||||
<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a>
|
<li><a name="TOC10" href="#SEC10">POSIX MALLOC USAGE</a>
|
||||||
<li><a name="TOC11" href="#SEC11">LIMITING PCRE RESOURCE USAGE</a>
|
<li><a name="TOC11" href="#SEC11">HANDLING VERY LARGE PATTERNS</a>
|
||||||
<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
<li><a name="TOC12" href="#SEC12">AVOIDING EXCESSIVE STACK USAGE</a>
|
||||||
<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
|
<li><a name="TOC13" href="#SEC13">LIMITING PCRE RESOURCE USAGE</a>
|
||||||
<li><a name="TOC14" href="#SEC14">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
<li><a name="TOC14" href="#SEC14">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
||||||
<li><a name="TOC15" href="#SEC15">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
|
<li><a name="TOC15" href="#SEC15">USING EBCDIC CODE</a>
|
||||||
<li><a name="TOC16" href="#SEC16">SEE ALSO</a>
|
<li><a name="TOC16" href="#SEC16">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
||||||
<li><a name="TOC17" href="#SEC17">AUTHOR</a>
|
<li><a name="TOC17" href="#SEC17">PCREGREP BUFFER SIZE</a>
|
||||||
<li><a name="TOC18" href="#SEC18">REVISION</a>
|
<li><a name="TOC18" href="#SEC18">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
|
||||||
|
<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||||
|
<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
|
||||||
|
<li><a name="TOC21" href="#SEC21">SEE ALSO</a>
|
||||||
|
<li><a name="TOC22" href="#SEC22">AUTHOR</a>
|
||||||
|
<li><a name="TOC23" href="#SEC23">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
|
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -39,10 +44,17 @@ the library is compiled. It assumes use of the <b>configure</b> script, where
|
|||||||
the optional features are selected or deselected by providing options to
|
the optional features are selected or deselected by providing options to
|
||||||
<b>configure</b> before running the <b>make</b> command. However, the same
|
<b>configure</b> before running the <b>make</b> command. However, the same
|
||||||
options can be selected in both Unix-like and non-Unix-like environments using
|
options can be selected in both Unix-like and non-Unix-like environments using
|
||||||
the GUI facility of <b>CMakeSetup</b> if you are using <b>CMake</b> instead of
|
the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead of
|
||||||
<b>configure</b> to build PCRE.
|
<b>configure</b> to build PCRE.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
There is a lot more information about building PCRE without using
|
||||||
|
<b>configure</b> (including information about using <b>CMake</b> or building "by
|
||||||
|
hand") in the file called <i>NON-AUTOTOOLS-BUILD</i>, which is part of the PCRE
|
||||||
|
distribution. You should consult this file as well as the <i>README</i> file if
|
||||||
|
you are building in a non-Unix-like environment.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
The complete list of options for <b>configure</b> (which includes the standard
|
The complete list of options for <b>configure</b> (which includes the standard
|
||||||
ones such as the selection of the installation directory) can be obtained by
|
ones such as the selection of the installation directory) can be obtained by
|
||||||
running
|
running
|
||||||
@ -55,45 +67,93 @@ The following sections include descriptions of options whose names begin with
|
|||||||
--enable and --disable always come in pairs, so the complementary option always
|
--enable and --disable always come in pairs, so the complementary option always
|
||||||
exists as well, but as it specifies the default, it is not described.
|
exists as well, but as it specifies the default, it is not described.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">C++ SUPPORT</a><br>
|
<br><a name="SEC2" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, the <b>configure</b> script will search for a C++ compiler and C++
|
By default, a library called <b>libpcre</b> is built, containing functions that
|
||||||
header files. If it finds them, it automatically builds the C++ wrapper library
|
take string arguments contained in vectors of bytes, either as single-byte
|
||||||
for PCRE. You can disable this by adding
|
characters, or interpreted as UTF-8 strings. You can also build a separate
|
||||||
|
library, called <b>libpcre16</b>, in which strings are contained in vectors of
|
||||||
|
16-bit data units and interpreted either as single-unit characters or UTF-16
|
||||||
|
strings, by adding
|
||||||
|
<pre>
|
||||||
|
--enable-pcre16
|
||||||
|
</pre>
|
||||||
|
to the <b>configure</b> command. You can also build a separate
|
||||||
|
library, called <b>libpcre32</b>, in which strings are contained in vectors of
|
||||||
|
32-bit data units and interpreted either as single-unit characters or UTF-32
|
||||||
|
strings, by adding
|
||||||
|
<pre>
|
||||||
|
--enable-pcre32
|
||||||
|
</pre>
|
||||||
|
to the <b>configure</b> command. If you do not want the 8-bit library, add
|
||||||
|
<pre>
|
||||||
|
--disable-pcre8
|
||||||
|
</pre>
|
||||||
|
as well. At least one of the three libraries must be built. Note that the C++
|
||||||
|
and POSIX wrappers are for the 8-bit library only, and that <b>pcregrep</b> is
|
||||||
|
an 8-bit program. None of these are built if you select only the 16-bit or
|
||||||
|
32-bit libraries.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
||||||
|
<P>
|
||||||
|
The PCRE building process uses <b>libtool</b> to build both shared and static
|
||||||
|
Unix libraries by default. You can suppress one of these by adding one of
|
||||||
|
<pre>
|
||||||
|
--disable-shared
|
||||||
|
--disable-static
|
||||||
|
</pre>
|
||||||
|
to the <b>configure</b> command, as required.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC4" href="#TOC1">C++ SUPPORT</a><br>
|
||||||
|
<P>
|
||||||
|
By default, if the 8-bit library is being built, the <b>configure</b> script
|
||||||
|
will search for a C++ compiler and C++ header files. If it finds them, it
|
||||||
|
automatically builds the C++ wrapper library (which supports only 8-bit
|
||||||
|
strings). You can disable this by adding
|
||||||
<pre>
|
<pre>
|
||||||
--disable-cpp
|
--disable-cpp
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command.
|
to the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">UTF-8 SUPPORT</a><br>
|
<br><a name="SEC5" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
To build PCRE with support for UTF-8 Unicode character strings, add
|
To build PCRE with support for UTF Unicode character strings, add
|
||||||
<pre>
|
<pre>
|
||||||
--enable-utf8
|
--enable-utf
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. Of itself, this does not make PCRE treat
|
to the <b>configure</b> command. This setting applies to all three libraries,
|
||||||
strings as UTF-8. As well as compiling PCRE with this option, you also have
|
adding support for UTF-8 to the 8-bit library, support for UTF-16 to the 16-bit
|
||||||
have to set the PCRE_UTF8 option when you call the <b>pcre_compile()</b>
|
library, and support for UTF-32 to the to the 32-bit library. There are no
|
||||||
function.
|
separate options for enabling UTF-8, UTF-16 and UTF-32 independently because
|
||||||
|
that would allow ridiculous settings such as requesting UTF-16 support while
|
||||||
|
building only the 8-bit library. It is not possible to build one library with
|
||||||
|
UTF support and another without in the same configuration. (For backwards
|
||||||
|
compatibility, --enable-utf8 is a synonym of --enable-utf.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you set --enable-utf8 when compiling in an EBCDIC environment, PCRE expects
|
Of itself, this setting does not make PCRE treat strings as UTF-8, UTF-16 or
|
||||||
its input to be either ASCII or UTF-8 (depending on the runtime option). It is
|
UTF-32. As well as compiling PCRE with this option, you also have have to set
|
||||||
|
the PCRE_UTF8, PCRE_UTF16 or PCRE_UTF32 option (as appropriate) when you call
|
||||||
|
one of the pattern compiling functions.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If you set --enable-utf when compiling in an EBCDIC environment, PCRE expects
|
||||||
|
its input to be either ASCII or UTF-8 (depending on the run-time option). It is
|
||||||
not possible to support both EBCDIC and UTF-8 codes in the same version of the
|
not possible to support both EBCDIC and UTF-8 codes in the same version of the
|
||||||
library. Consequently, --enable-utf8 and --enable-ebcdic are mutually
|
library. Consequently, --enable-utf and --enable-ebcdic are mutually
|
||||||
exclusive.
|
exclusive.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
|
<br><a name="SEC6" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
UTF-8 support allows PCRE to process character values greater than 255 in the
|
UTF support allows the libraries to process character codepoints up to 0x10ffff
|
||||||
strings that it handles. On its own, however, it does not provide any
|
in the strings that they handle. On its own, however, it does not provide any
|
||||||
facilities for accessing the properties of such characters. If you want to be
|
facilities for accessing the properties of such characters. If you want to be
|
||||||
able to use the pattern escapes \P, \p, and \X, which refer to Unicode
|
able to use the pattern escapes \P, \p, and \X, which refer to Unicode
|
||||||
character properties, you must add
|
character properties, you must add
|
||||||
<pre>
|
<pre>
|
||||||
--enable-unicode-properties
|
--enable-unicode-properties
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. This implies UTF-8 support, even if you have
|
to the <b>configure</b> command. This implies UTF support, even if you have
|
||||||
not explicitly requested it.
|
not explicitly requested it.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -103,7 +163,24 @@ supported. Details are given in the
|
|||||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
|
<br><a name="SEC7" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||||
|
<P>
|
||||||
|
Just-in-time compiler support is included in the build by specifying
|
||||||
|
<pre>
|
||||||
|
--enable-jit
|
||||||
|
</pre>
|
||||||
|
This support is available only for certain hardware architectures. If this
|
||||||
|
option is set for an unsupported architecture, a compile time error occurs.
|
||||||
|
See the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
documentation for a discussion of JIT usage. When JIT support is enabled,
|
||||||
|
pcregrep automatically makes use of it, unless you add
|
||||||
|
<pre>
|
||||||
|
--disable-pcregrep-jit
|
||||||
|
</pre>
|
||||||
|
to the "configure" command.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC8" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, PCRE interprets the linefeed (LF) character as indicating the end
|
By default, PCRE interprets the linefeed (LF) character as indicating the end
|
||||||
of a line. This is the normal newline character on Unix-like systems. You can
|
of a line. This is the normal newline character on Unix-like systems. You can
|
||||||
@ -136,7 +213,7 @@ Whatever line ending convention is selected when PCRE is built can be
|
|||||||
overridden when the library functions are called. At build time it is
|
overridden when the library functions are called. At build time it is
|
||||||
conventional to use the standard for your operating system.
|
conventional to use the standard for your operating system.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">WHAT \R MATCHES</a><br>
|
<br><a name="SEC9" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
||||||
whatever has been selected as the line ending sequence. If you specify
|
whatever has been selected as the line ending sequence. If you specify
|
||||||
@ -147,19 +224,9 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
|
|||||||
selected when PCRE is built can be overridden when the library functions are
|
selected when PCRE is built can be overridden when the library functions are
|
||||||
called.
|
called.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
<br><a name="SEC10" href="#TOC1">POSIX MALLOC USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
The PCRE building process uses <b>libtool</b> to build both shared and static
|
When the 8-bit library is called through the POSIX interface (see the
|
||||||
Unix libraries by default. You can suppress one of these by adding one of
|
|
||||||
<pre>
|
|
||||||
--disable-shared
|
|
||||||
--disable-static
|
|
||||||
</pre>
|
|
||||||
to the <b>configure</b> command, as required.
|
|
||||||
</P>
|
|
||||||
<br><a name="SEC8" href="#TOC1">POSIX MALLOC USAGE</a><br>
|
|
||||||
<P>
|
|
||||||
When PCRE is called through the POSIX interface (see the
|
|
||||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
documentation), additional working storage is required for holding the pointers
|
documentation), additional working storage is required for holding the pointers
|
||||||
to capturing substrings, because PCRE requires three integers per substring,
|
to capturing substrings, because PCRE requires three integers per substring,
|
||||||
@ -173,23 +240,26 @@ such as
|
|||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command.
|
to the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
<br><a name="SEC11" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Within a compiled pattern, offset values are used to point from one part to
|
Within a compiled pattern, offset values are used to point from one part to
|
||||||
another (for example, from an opening parenthesis to an alternation
|
another (for example, from an opening parenthesis to an alternation
|
||||||
metacharacter). By default, two-byte values are used for these offsets, leading
|
metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
|
||||||
to a maximum size for a compiled pattern of around 64K. This is sufficient to
|
are used for these offsets, leading to a maximum size for a compiled pattern of
|
||||||
handle all but the most gigantic patterns. Nevertheless, some people do want to
|
around 64K. This is sufficient to handle all but the most gigantic patterns.
|
||||||
process enormous patterns, so it is possible to compile PCRE to use three-byte
|
Nevertheless, some people do want to process truly enormous patterns, so it is
|
||||||
or four-byte offsets by adding a setting such as
|
possible to compile PCRE to use three-byte or four-byte offsets by adding a
|
||||||
|
setting such as
|
||||||
<pre>
|
<pre>
|
||||||
--with-link-size=3
|
--with-link-size=3
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. The value given must be 2, 3, or 4. Using
|
to the <b>configure</b> command. The value given must be 2, 3, or 4. For the
|
||||||
|
16-bit library, a value of 3 is rounded up to 4. In these libraries, using
|
||||||
longer offsets slows down the operation of PCRE because it has to load
|
longer offsets slows down the operation of PCRE because it has to load
|
||||||
additional bytes when handling them.
|
additional data when handling them. For the 32-bit library the value is always
|
||||||
|
4 and cannot be overridden; the value of --with-link-size is ignored.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
<br><a name="SEC12" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
|
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
|
||||||
by making recursive calls to an internal function called <b>match()</b>. In
|
by making recursive calls to an internal function called <b>match()</b>. In
|
||||||
@ -209,7 +279,7 @@ to the <b>configure</b> command. With this configuration, PCRE will use the
|
|||||||
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory
|
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory
|
||||||
management functions. By default these point to <b>malloc()</b> and
|
management functions. By default these point to <b>malloc()</b> and
|
||||||
<b>free()</b>, but you can replace the pointers so that your own functions are
|
<b>free()</b>, but you can replace the pointers so that your own functions are
|
||||||
used.
|
used instead.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Separate functions are provided rather than using <b>pcre_malloc</b> and
|
Separate functions are provided rather than using <b>pcre_malloc</b> and
|
||||||
@ -218,9 +288,9 @@ requested are always the same, and the blocks are always freed in reverse
|
|||||||
order. A calling program might be able to implement optimized functions that
|
order. A calling program might be able to implement optimized functions that
|
||||||
perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
|
perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
|
||||||
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
|
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
|
||||||
function; it is not relevant for the the <b>pcre_dfa_exec()</b> function.
|
function; it is not relevant for <b>pcre_dfa_exec()</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
|
<br><a name="SEC13" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
|
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
|
||||||
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
|
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
|
||||||
@ -249,7 +319,7 @@ constraints. However, you can set a lower limit by adding, for example,
|
|||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
<br><a name="SEC14" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE uses fixed tables for processing characters whose code values are less
|
PCRE uses fixed tables for processing characters whose code values are less
|
||||||
than 256. By default, PCRE is built with a set of tables that are distributed
|
than 256. By default, PCRE is built with a set of tables that are distributed
|
||||||
@ -260,13 +330,13 @@ only. If you add
|
|||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command, the distributed tables are no longer used.
|
to the <b>configure</b> command, the distributed tables are no longer used.
|
||||||
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
|
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
|
||||||
source for new set of tables, created in the default locale of your C runtime
|
source for new set of tables, created in the default locale of your C run-time
|
||||||
system. (This method of replacing the tables does not work if you are cross
|
system. (This method of replacing the tables does not work if you are cross
|
||||||
compiling, because <b>dftables</b> is run on the local host. If you need to
|
compiling, because <b>dftables</b> is run on the local host. If you need to
|
||||||
create alternative tables when cross compiling, you will have to do so "by
|
create alternative tables when cross compiling, you will have to do so "by
|
||||||
hand".)
|
hand".)
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
<br><a name="SEC15" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE assumes by default that it will run in an environment where the character
|
PCRE assumes by default that it will run in an environment where the character
|
||||||
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
|
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
|
||||||
@ -278,9 +348,26 @@ EBCDIC environment by adding
|
|||||||
to the <b>configure</b> command. This setting implies
|
to the <b>configure</b> command. This setting implies
|
||||||
--enable-rebuild-chartables. You should only use it if you know that you are in
|
--enable-rebuild-chartables. You should only use it if you know that you are in
|
||||||
an EBCDIC environment (for example, an IBM mainframe operating system). The
|
an EBCDIC environment (for example, an IBM mainframe operating system). The
|
||||||
--enable-ebcdic option is incompatible with --enable-utf8.
|
--enable-ebcdic option is incompatible with --enable-utf.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
<P>
|
||||||
|
The EBCDIC character that corresponds to an ASCII LF is assumed to have the
|
||||||
|
value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In
|
||||||
|
such an environment you should use
|
||||||
|
<pre>
|
||||||
|
--enable-ebcdic-nl25
|
||||||
|
</pre>
|
||||||
|
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the
|
||||||
|
same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is <i>not</i>
|
||||||
|
chosen as LF is made to correspond to the Unicode NEL character (which, in
|
||||||
|
Unicode, is 0x85).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The options that select newline behaviour, such as --enable-newline-is-cr,
|
||||||
|
and equivalent run-time options, refer to these character values in an EBCDIC
|
||||||
|
environment.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC16" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
|
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
|
||||||
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
||||||
@ -293,7 +380,22 @@ to the <b>configure</b> command. These options naturally require that the
|
|||||||
relevant libraries are installed on your system. Configuration will fail if
|
relevant libraries are installed on your system. Configuration will fail if
|
||||||
they are not.
|
they are not.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
<br><a name="SEC17" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
|
||||||
|
<P>
|
||||||
|
<b>pcregrep</b> uses an internal buffer to hold a "window" on the file it is
|
||||||
|
scanning, in order to be able to output "before" and "after" lines when it
|
||||||
|
finds a match. The size of the buffer is controlled by a parameter whose
|
||||||
|
default value is 20K. The buffer itself is three times this size, but because
|
||||||
|
of the way it is used for holding "before" lines, the longest line that is
|
||||||
|
guaranteed to be processable is the parameter size. You can change the default
|
||||||
|
parameter value by adding, for example,
|
||||||
|
<pre>
|
||||||
|
--with-pcregrep-bufsize=50K
|
||||||
|
</pre>
|
||||||
|
to the <b>configure</b> command. The caller of \fPpcregrep\fP can, however,
|
||||||
|
override this value by specifying a run-time option.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC18" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
If you add
|
If you add
|
||||||
<pre>
|
<pre>
|
||||||
@ -302,7 +404,7 @@ If you add
|
|||||||
to the <b>configure</b> command, <b>pcretest</b> is linked with the
|
to the <b>configure</b> command, <b>pcretest</b> is linked with the
|
||||||
<b>libreadline</b> library, and when its input is from a terminal, it reads it
|
<b>libreadline</b> library, and when its input is from a terminal, it reads it
|
||||||
using the <b>readline()</b> function. This provides line-editing and history
|
using the <b>readline()</b> function. This provides line-editing and history
|
||||||
facilities. Note that <b>libreadline</b> is GPL-licenced, so if you distribute a
|
facilities. Note that <b>libreadline</b> is GPL-licensed, so if you distribute a
|
||||||
binary of <b>pcretest</b> linked in this way, there may be licensing issues.
|
binary of <b>pcretest</b> linked in this way, there may be licensing issues.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -324,11 +426,78 @@ automatically included, you may need to add something like
|
|||||||
</pre>
|
</pre>
|
||||||
immediately before the <b>configure</b> command.
|
immediately before the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcreapi</b>(3), <b>pcre_config</b>(3).
|
By adding the
|
||||||
|
<pre>
|
||||||
|
--enable-valgrind
|
||||||
|
</pre>
|
||||||
|
option to to the <b>configure</b> command, PCRE will use valgrind annotations
|
||||||
|
to mark certain memory regions as unaddressable. This allows it to detect
|
||||||
|
invalid memory accesses, and is mostly useful for debugging PCRE itself.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
||||||
|
<P>
|
||||||
|
If your C compiler is gcc, you can build a version of PCRE that can generate a
|
||||||
|
code coverage report for its test suite. To enable this, you must install
|
||||||
|
<b>lcov</b> version 1.6 or above. Then specify
|
||||||
|
<pre>
|
||||||
|
--enable-coverage
|
||||||
|
</pre>
|
||||||
|
to the <b>configure</b> command and build PCRE in the usual way.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that using <b>ccache</b> (a caching C compiler) is incompatible with code
|
||||||
|
coverage reporting. If you have configured <b>ccache</b> to run automatically
|
||||||
|
on your system, you must set the environment variable
|
||||||
|
<pre>
|
||||||
|
CCACHE_DISABLE=1
|
||||||
|
</pre>
|
||||||
|
before running <b>make</b> to build PCRE, so that <b>ccache</b> is not used.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
When --enable-coverage is used, the following addition targets are added to the
|
||||||
|
<i>Makefile</i>:
|
||||||
|
<pre>
|
||||||
|
make coverage
|
||||||
|
</pre>
|
||||||
|
This creates a fresh coverage report for the PCRE test suite. It is equivalent
|
||||||
|
to running "make coverage-reset", "make coverage-baseline", "make check", and
|
||||||
|
then "make coverage-report".
|
||||||
|
<pre>
|
||||||
|
make coverage-reset
|
||||||
|
</pre>
|
||||||
|
This zeroes the coverage counters, but does nothing else.
|
||||||
|
<pre>
|
||||||
|
make coverage-baseline
|
||||||
|
</pre>
|
||||||
|
This captures baseline coverage information.
|
||||||
|
<pre>
|
||||||
|
make coverage-report
|
||||||
|
</pre>
|
||||||
|
This creates the coverage report.
|
||||||
|
<pre>
|
||||||
|
make coverage-clean-report
|
||||||
|
</pre>
|
||||||
|
This removes the generated coverage report without cleaning the coverage data
|
||||||
|
itself.
|
||||||
|
<pre>
|
||||||
|
make coverage-clean-data
|
||||||
|
</pre>
|
||||||
|
This removes the captured coverage data without removing the coverage files
|
||||||
|
created at compile time (*.gcno).
|
||||||
|
<pre>
|
||||||
|
make coverage-clean
|
||||||
|
</pre>
|
||||||
|
This cleans all coverage data including the generated coverage report. For more
|
||||||
|
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
||||||
|
documentation.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC21" href="#TOC1">SEE ALSO</a><br>
|
||||||
|
<P>
|
||||||
|
<b>pcreapi</b>(3), <b>pcre16</b>, <b>pcre32</b>, <b>pcre_config</b>(3).
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC22" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -337,11 +506,11 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC23" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 17 March 2009
|
Last updated: 30 October 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2009 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -13,23 +13,35 @@ from the original man page. If there is any nonsense in it, please consult the
|
|||||||
man page, in case the conversion went wrong.
|
man page, in case the conversion went wrong.
|
||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">PCRE CALLOUTS</a>
|
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||||
<li><a name="TOC2" href="#SEC2">MISSING CALLOUTS</a>
|
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||||
<li><a name="TOC3" href="#SEC3">THE CALLOUT INTERFACE</a>
|
<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
|
||||||
<li><a name="TOC4" href="#SEC4">RETURN VALUES</a>
|
<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
|
||||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
<li><a name="TOC5" href="#SEC5">RETURN VALUES</a>
|
||||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
<li><a name="TOC6" href="#SEC6">AUTHOR</a>
|
||||||
|
<li><a name="TOC7" href="#SEC7">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PCRE CALLOUTS</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
|
<P>
|
||||||
PCRE provides a feature called "callout", which is a means of temporarily
|
PCRE provides a feature called "callout", which is a means of temporarily
|
||||||
passing control to the caller of PCRE in the middle of pattern matching. The
|
passing control to the caller of PCRE in the middle of pattern matching. The
|
||||||
caller of PCRE provides an external function by putting its entry point in the
|
caller of PCRE provides an external function by putting its entry point in the
|
||||||
global variable <i>pcre_callout</i>. By default, this variable contains NULL,
|
global variable <i>pcre_callout</i> (<i>pcre16_callout</i> for the 16-bit
|
||||||
which disables all calling out.
|
library, <i>pcre32_callout</i> for the 32-bit library). By default, this
|
||||||
|
variable contains NULL, which disables all calling out.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Within a regular expression, (?C) indicates the points at which the external
|
Within a regular expression, (?C) indicates the points at which the external
|
||||||
@ -39,9 +51,9 @@ For example, this pattern has two callout points:
|
|||||||
<pre>
|
<pre>
|
||||||
(?C1)abc(?C2)def
|
(?C1)abc(?C2)def
|
||||||
</pre>
|
</pre>
|
||||||
If the PCRE_AUTO_CALLOUT option bit is set when <b>pcre_compile()</b> is called,
|
If the PCRE_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE
|
||||||
PCRE automatically inserts callouts, all with number 255, before each item in
|
automatically inserts callouts, all with number 255, before each item in the
|
||||||
the pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern
|
pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern
|
||||||
<pre>
|
<pre>
|
||||||
A(\d{2}|--)
|
A(\d{2}|--)
|
||||||
</pre>
|
</pre>
|
||||||
@ -59,7 +71,12 @@ command has an option that sets automatic callouts; when it is used, the output
|
|||||||
indicates how the pattern is matched. This is useful information when you are
|
indicates how the pattern is matched. This is useful information when you are
|
||||||
trying to optimize the performance of a particular pattern.
|
trying to optimize the performance of a particular pattern.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">MISSING CALLOUTS</a><br>
|
<P>
|
||||||
|
The use of callouts in a pattern makes it ineligible for optimization by the
|
||||||
|
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
|
||||||
|
option always fails.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
You should be aware that, because of optimizations in the way PCRE matches
|
You should be aware that, because of optimizations in the way PCRE matches
|
||||||
patterns by default, callouts sometimes do not happen. For example, if the
|
patterns by default, callouts sometimes do not happen. For example, if the
|
||||||
@ -73,34 +90,46 @@ the callout is never reached. However, with "abyd", though the result is still
|
|||||||
no match, the callout is obeyed.
|
no match, the callout is obeyed.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
|
If the pattern is studied, PCRE knows the minimum length of a matching string,
|
||||||
option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. This slows down the
|
and will immediately give a "no match" return without actually running a match
|
||||||
matching process, but does ensure that callouts such as the example above are
|
if the subject is not long enough, or, for unanchored patterns, if it has
|
||||||
obeyed.
|
been scanned far enough.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
<P>
|
||||||
|
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
|
||||||
|
option to the matching function, or by starting the pattern with
|
||||||
|
(*NO_START_OPT). This slows down the matching process, but does ensure that
|
||||||
|
callouts such as the example above are obeyed.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||||
<P>
|
<P>
|
||||||
During matching, when PCRE reaches a callout point, the external function
|
During matching, when PCRE reaches a callout point, the external function
|
||||||
defined by <i>pcre_callout</i> is called (if it is set). This applies to both
|
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called
|
||||||
the <b>pcre_exec()</b> and the <b>pcre_dfa_exec()</b> matching functions. The
|
(if it is set). This applies to both normal and DFA matching. The only
|
||||||
only argument to the callout function is a pointer to a <b>pcre_callout</b>
|
argument to the callout function is a pointer to a <b>pcre_callout</b>
|
||||||
block. This structure contains the following fields:
|
or <b>pcre[16|32]_callout</b> block.
|
||||||
|
These structures contains the following fields:
|
||||||
<pre>
|
<pre>
|
||||||
int <i>version</i>;
|
int <i>version</i>;
|
||||||
int <i>callout_number</i>;
|
int <i>callout_number</i>;
|
||||||
int *<i>offset_vector</i>;
|
int *<i>offset_vector</i>;
|
||||||
const char *<i>subject</i>;
|
const char *<i>subject</i>; (8-bit version)
|
||||||
int <i>subject_length</i>;
|
PCRE_SPTR16 <i>subject</i>; (16-bit version)
|
||||||
int <i>start_match</i>;
|
PCRE_SPTR32 <i>subject</i>; (32-bit version)
|
||||||
int <i>current_position</i>;
|
int <i>subject_length</i>;
|
||||||
int <i>capture_top</i>;
|
int <i>start_match</i>;
|
||||||
int <i>capture_last</i>;
|
int <i>current_position</i>;
|
||||||
void *<i>callout_data</i>;
|
int <i>capture_top</i>;
|
||||||
int <i>pattern_position</i>;
|
int <i>capture_last</i>;
|
||||||
int <i>next_item_length</i>;
|
void *<i>callout_data</i>;
|
||||||
|
int <i>pattern_position</i>;
|
||||||
|
int <i>next_item_length</i>;
|
||||||
|
const unsigned char *<i>mark</i>; (8-bit version)
|
||||||
|
const PCRE_UCHAR16 *<i>mark</i>; (16-bit version)
|
||||||
|
const PCRE_UCHAR32 *<i>mark</i>; (32-bit version)
|
||||||
</pre>
|
</pre>
|
||||||
The <i>version</i> field is an integer containing the version number of the
|
The <i>version</i> field is an integer containing the version number of the
|
||||||
block format. The initial version was 0; the current version is 1. The version
|
block format. The initial version was 0; the current version is 2. The version
|
||||||
number will change again in future if additional fields are added, but the
|
number will change again in future if additional fields are added, but the
|
||||||
intention is never to remove any of the existing fields.
|
intention is never to remove any of the existing fields.
|
||||||
</P>
|
</P>
|
||||||
@ -111,15 +140,15 @@ automatically generated callouts).
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
|
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
|
||||||
passed by the caller to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. When
|
passed by the caller to the matching function. When <b>pcre_exec()</b> or
|
||||||
<b>pcre_exec()</b> is used, the contents can be inspected in order to extract
|
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to extract
|
||||||
substrings that have been matched so far, in the same way as for extracting
|
substrings that have been matched so far, in the same way as for extracting
|
||||||
substrings after a match has completed. For <b>pcre_dfa_exec()</b> this field is
|
substrings after a match has completed. For the DFA matching functions, this
|
||||||
not useful.
|
field is not useful.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
|
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
|
||||||
that were passed to <b>pcre_exec()</b>.
|
that were passed to the matching function.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>start_match</i> field normally contains the offset within the subject at
|
The <i>start_match</i> field normally contains the offset within the subject at
|
||||||
@ -134,53 +163,59 @@ The <i>current_position</i> field contains the offset within the subject of the
|
|||||||
current match pointer.
|
current match pointer.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When the <b>pcre_exec()</b> function is used, the <i>capture_top</i> field
|
When the <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> is used, the
|
||||||
contains one more than the number of the highest numbered captured substring so
|
<i>capture_top</i> field contains one more than the number of the highest
|
||||||
far. If no substrings have been captured, the value of <i>capture_top</i> is
|
numbered captured substring so far. If no substrings have been captured, the
|
||||||
one. This is always the case when <b>pcre_dfa_exec()</b> is used, because it
|
value of <i>capture_top</i> is one. This is always the case when the DFA
|
||||||
does not support captured substrings.
|
functions are used, because they do not support captured substrings.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>capture_last</i> field contains the number of the most recently captured
|
The <i>capture_last</i> field contains the number of the most recently captured
|
||||||
substring. If no substrings have been captured, its value is -1. This is always
|
substring. If no substrings have been captured, its value is -1. This is always
|
||||||
the case when <b>pcre_dfa_exec()</b> is used.
|
the case for the DFA matching functions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>callout_data</i> field contains a value that is passed to
|
The <i>callout_data</i> field contains a value that is passed to a matching
|
||||||
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> specifically so that it can be
|
function specifically so that it can be passed back in callouts. It is passed
|
||||||
passed back in callouts. It is passed in the <i>pcre_callout</i> field of the
|
in the <i>callout_data</i> field of a <b>pcre_extra</b> or <b>pcre[16|32]_extra</b>
|
||||||
<b>pcre_extra</b> data structure. If no such data was passed, the value of
|
data structure. If no such data was passed, the value of <i>callout_data</i> in
|
||||||
<i>callout_data</i> in a <b>pcre_callout</b> block is NULL. There is a
|
a callout block is NULL. There is a description of the <b>pcre_extra</b>
|
||||||
description of the <b>pcre_extra</b> structure in the
|
structure in the
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>pattern_position</i> field is present from version 1 of the
|
The <i>pattern_position</i> field is present from version 1 of the callout
|
||||||
<i>pcre_callout</i> structure. It contains the offset to the next item to be
|
structure. It contains the offset to the next item to be matched in the pattern
|
||||||
matched in the pattern string.
|
string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>next_item_length</i> field is present from version 1 of the
|
The <i>next_item_length</i> field is present from version 1 of the callout
|
||||||
<i>pcre_callout</i> structure. It contains the length of the next item to be
|
structure. It contains the length of the next item to be matched in the pattern
|
||||||
matched in the pattern string. When the callout immediately precedes an
|
string. When the callout immediately precedes an alternation bar, a closing
|
||||||
alternation bar, a closing parenthesis, or the end of the pattern, the length
|
parenthesis, or the end of the pattern, the length is zero. When the callout
|
||||||
is zero. When the callout precedes an opening parenthesis, the length is that
|
precedes an opening parenthesis, the length is that of the entire subpattern.
|
||||||
of the entire subpattern.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
||||||
help in distinguishing between different automatic callouts, which all have the
|
help in distinguishing between different automatic callouts, which all have the
|
||||||
same callout number. However, they are set for all callouts.
|
same callout number. However, they are set for all callouts.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">RETURN VALUES</a><br>
|
<P>
|
||||||
|
The <i>mark</i> field is present from version 2 of the callout structure. In
|
||||||
|
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a pointer to
|
||||||
|
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
||||||
|
(*THEN) item in the match, or NULL if no such items have been passed. Instances
|
||||||
|
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
||||||
|
callouts from the DFA matching functions this field always contains NULL.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
|
||||||
<P>
|
<P>
|
||||||
The external callout function returns an integer to PCRE. If the value is zero,
|
The external callout function returns an integer to PCRE. If the value is zero,
|
||||||
matching proceeds as normal. If the value is greater than zero, matching fails
|
matching proceeds as normal. If the value is greater than zero, matching fails
|
||||||
at the current point, but the testing of other matching possibilities goes
|
at the current point, but the testing of other matching possibilities goes
|
||||||
ahead, just as if a lookahead assertion had failed. If the value is less than
|
ahead, just as if a lookahead assertion had failed. If the value is less than
|
||||||
zero, the match is abandoned, and <b>pcre_exec()</b> (or <b>pcre_dfa_exec()</b>)
|
zero, the match is abandoned, the matching function returns the negative value.
|
||||||
returns the negative value.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Negative values should normally be chosen from the set of PCRE_ERROR_xxx
|
Negative values should normally be chosen from the set of PCRE_ERROR_xxx
|
||||||
@ -188,7 +223,7 @@ values. In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
|
|||||||
The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
|
The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
|
||||||
it will never be used by PCRE itself.
|
it will never be used by PCRE itself.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -197,11 +232,11 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 15 March 2009
|
Last updated: 24 June 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2009 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -17,23 +17,22 @@ DIFFERENCES BETWEEN PCRE AND PERL
|
|||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This document describes the differences in the ways that PCRE and Perl handle
|
This document describes the differences in the ways that PCRE and Perl handle
|
||||||
regular expressions. The differences described here are mainly with respect to
|
regular expressions. The differences described here are with respect to Perl
|
||||||
Perl 5.8, though PCRE versions 7.0 and later contain some features that are
|
versions 5.10 and above.
|
||||||
expected to be in the forthcoming Perl 5.10.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what
|
1. PCRE has only a subset of Perl's Unicode support. Details of what it does
|
||||||
it does have are given in the
|
have are given in the
|
||||||
<a href="pcre.html#utf8support">section on UTF-8 support</a>
|
<a href="pcreunicode.html"><b>pcreunicode</b></a>
|
||||||
in the main
|
|
||||||
<a href="pcre.html"><b>pcre</b></a>
|
|
||||||
page.
|
page.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits
|
2. PCRE allows repeat quantifiers only on parenthesized assertions, but they do
|
||||||
them, but they do not mean what you might think. For example, (?!a){3} does
|
not mean what you might think. For example, (?!a){3} does not assert that the
|
||||||
not assert that the next three characters are not "a". It just asserts that the
|
next three characters are not "a". It just asserts that the next character is
|
||||||
next character is not "a" three times.
|
not "a" three times (in principle: PCRE optimizes this to run the assertion
|
||||||
|
just once). Perl allows repeat quantifiers on other assertions such as \b, but
|
||||||
|
these do not seem to have any use.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
||||||
@ -50,16 +49,22 @@ represent a binary zero.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
5. The following Perl escape sequences are not supported: \l, \u, \L,
|
5. The following Perl escape sequences are not supported: \l, \u, \L,
|
||||||
\U, and \N. In fact these are implemented by Perl's general string-handling
|
\U, and \N when followed by a character name or Unicode value. (\N on its
|
||||||
and are not part of its pattern matching engine. If any of these are
|
own, matching a non-newline character, is supported.) In fact these are
|
||||||
encountered by PCRE, an error is generated.
|
implemented by Perl's general string-handling and are not part of its pattern
|
||||||
|
matching engine. If any of these are encountered by PCRE, an error is
|
||||||
|
generated by default. However, if the PCRE_JAVASCRIPT_COMPAT option is set,
|
||||||
|
\U and \u are interpreted as JavaScript interprets them.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
6. The Perl escape sequences \p, \P, and \X are supported only if PCRE is
|
6. The Perl escape sequences \p, \P, and \X are supported only if PCRE is
|
||||||
built with Unicode character property support. The properties that can be
|
built with Unicode character property support. The properties that can be
|
||||||
tested with \p and \P are limited to the general category properties such as
|
tested with \p and \P are limited to the general category properties such as
|
||||||
Lu and Nd, script names such as Greek or Han, and the derived properties Any
|
Lu and Nd, script names such as Greek or Han, and the derived properties Any
|
||||||
and L&.
|
and L&. PCRE does support the Cs (surrogate) property, which Perl does not; the
|
||||||
|
Perl documentation says "Because Perl hides the need for the user to understand
|
||||||
|
the internal representation of Unicode characters, there is no need to
|
||||||
|
implement the somewhat messy concept of surrogates."
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
7. PCRE does support the \Q...\E escape for quoting substrings. Characters in
|
7. PCRE does support the \Q...\E escape for quoting substrings. Characters in
|
||||||
@ -79,37 +84,67 @@ The \Q...\E sequence is recognized both inside and outside character classes.
|
|||||||
<P>
|
<P>
|
||||||
8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
|
8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
|
||||||
constructions. However, there is support for recursive patterns. This is not
|
constructions. However, there is support for recursive patterns. This is not
|
||||||
available in Perl 5.8, but will be in Perl 5.10. Also, the PCRE "callout"
|
available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE "callout"
|
||||||
feature allows an external function to be called during pattern matching. See
|
feature allows an external function to be called during pattern matching. See
|
||||||
the
|
the
|
||||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||||
documentation for details.
|
documentation for details.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
9. Subpatterns that are called recursively or as "subroutines" are always
|
9. Subpatterns that are called as subroutines (whether or not recursively) are
|
||||||
treated as atomic groups in PCRE. This is like Python, but unlike Perl.
|
always treated as atomic groups in PCRE. This is like Python, but unlike Perl.
|
||||||
|
Captured values that are set outside a subroutine call can be reference from
|
||||||
|
inside in PCRE, but not in Perl. There is a discussion that explains these
|
||||||
|
differences in more detail in the
|
||||||
|
<a href="pcrepattern.html#recursiondifference">section on recursion differences from Perl</a>
|
||||||
|
in the
|
||||||
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
|
page.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
10. There are some differences that are concerned with the settings of captured
|
10. If any of the backtracking control verbs are used in an assertion or in a
|
||||||
|
subpattern that is called as a subroutine (whether or not recursively), their
|
||||||
|
effect is confined to that subpattern; it does not extend to the surrounding
|
||||||
|
pattern. This is not always the case in Perl. In particular, if (*THEN) is
|
||||||
|
present in a group that is called as a subroutine, its action is limited to
|
||||||
|
that group, even if the group does not contain any | characters. There is one
|
||||||
|
exception to this: the name from a *(MARK), (*PRUNE), or (*THEN) that is
|
||||||
|
encountered in a successful positive assertion <i>is</i> passed back when a
|
||||||
|
match succeeds (compare capturing parentheses in assertions). Note that such
|
||||||
|
subpatterns are processed as anchored at the point where they are tested.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
11. There are some differences that are concerned with the settings of captured
|
||||||
strings when part of a pattern is repeated. For example, matching "aba" against
|
strings when part of a pattern is repeated. For example, matching "aba" against
|
||||||
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
11. PCRE does support Perl 5.10's backtracking verbs (*ACCEPT), (*FAIL), (*F),
|
12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
|
||||||
(*COMMIT), (*PRUNE), (*SKIP), and (*THEN), but only in the forms without an
|
names is not as general as Perl's. This is a consequence of the fact the PCRE
|
||||||
argument. PCRE does not support (*MARK). If (*ACCEPT) is within capturing
|
works internally just with numbers, using an external table to translate
|
||||||
parentheses, PCRE does not set that capture group; this is different to Perl.
|
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
||||||
|
where the two capturing parentheses have the same number but different names,
|
||||||
|
is not supported, and causes an error at compile time. If it were allowed, it
|
||||||
|
would not be possible to distinguish which parentheses matched, because both
|
||||||
|
names map to capturing subpattern number 1. To avoid this confusing situation,
|
||||||
|
an error is given at compile time.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
12. PCRE provides some extensions to the Perl regular expression facilities.
|
13. Perl recognizes comments in some places that PCRE does not, for example,
|
||||||
Perl 5.10 will include new features that are not in earlier versions, some of
|
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||||
which (such as named parentheses) have been in PCRE for some time. This list is
|
Perl allows white space between ( and ? but PCRE never does, even if the
|
||||||
with respect to Perl 5.10:
|
PCRE_EXTENDED option is set.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
14. PCRE provides some extensions to the Perl regular expression facilities.
|
||||||
|
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||||
|
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||||
|
is with respect to Perl 5.10:
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(a) Although lookbehind assertions must match fixed length strings, each
|
(a) Although lookbehind assertions in PCRE must match fixed length strings,
|
||||||
alternative branch of a lookbehind assertion can match a different length of
|
each alternative branch of a lookbehind assertion can match a different length
|
||||||
string. Perl requires them all to have the same length.
|
of string. Perl requires them all to have the same length.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
|
(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
|
||||||
@ -130,8 +165,8 @@ question mark they are.
|
|||||||
only at the first matching position in the subject string.
|
only at the first matching position in the subject string.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and PCRE_NO_AUTO_CAPTURE
|
(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, and
|
||||||
options for <b>pcre_exec()</b> have no Perl equivalents.
|
PCRE_NO_AUTO_CAPTURE options for <b>pcre_exec()</b> have no Perl equivalents.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
||||||
@ -145,11 +180,13 @@ by the PCRE_BSR_ANYCRLF option.
|
|||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on
|
(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on
|
||||||
different hosts that have the other endianness.
|
different hosts that have the other endianness. However, this does not apply to
|
||||||
|
optimized data created by the just-in-time compiler.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(k) The alternative matching function (<b>pcre_dfa_exec()</b>) matches in a
|
(k) The alternative matching functions (<b>pcre_dfa_exec()</b>,
|
||||||
different way and is not Perl-compatible.
|
<b>pcre16_dfa_exec()</b> and <b>pcre32_dfa_exec()</b>,) match in a different way
|
||||||
|
and are not Perl-compatible.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(l) PCRE recognizes some special sequences such as (*CR) at the start of
|
(l) PCRE recognizes some special sequences such as (*CR) at the start of
|
||||||
@ -170,9 +207,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 September 2007
|
Last updated: 25 August 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2007 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -35,7 +35,8 @@ man page, in case the conversion went wrong.
|
|||||||
The C++ wrapper for PCRE was provided by Google Inc. Some additional
|
The C++ wrapper for PCRE was provided by Google Inc. Some additional
|
||||||
functionality was added by Giuseppe Maxia. This brief man page was constructed
|
functionality was added by Giuseppe Maxia. This brief man page was constructed
|
||||||
from the notes in the <i>pcrecpp.h</i> file, which should be consulted for
|
from the notes in the <i>pcrecpp.h</i> file, which should be consulted for
|
||||||
further details.
|
further details. Note that the C++ wrapper supports only the original 8-bit
|
||||||
|
PCRE library. There is no 16-bit or 32-bit support at present.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br>
|
<br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -191,7 +192,7 @@ supported:
|
|||||||
PCRE_DOTALL dot matches newlines /s
|
PCRE_DOTALL dot matches newlines /s
|
||||||
PCRE_DOLLAR_ENDONLY $ matches only at end N/A
|
PCRE_DOLLAR_ENDONLY $ matches only at end N/A
|
||||||
PCRE_EXTRA strict escape parsing N/A
|
PCRE_EXTRA strict escape parsing N/A
|
||||||
PCRE_EXTENDED ignore whitespaces /x
|
PCRE_EXTENDED ignore white spaces /x
|
||||||
PCRE_UTF8 handles UTF8 chars built-in
|
PCRE_UTF8 handles UTF8 chars built-in
|
||||||
PCRE_UNGREEDY reverses * and *? N/A
|
PCRE_UNGREEDY reverses * and *? N/A
|
||||||
PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*)
|
PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*)
|
||||||
@ -232,7 +233,7 @@ Normally, to pass one or more modifiers to a RE class, you declare
|
|||||||
a <i>RE_Options</i> object, set the appropriate options, and pass this
|
a <i>RE_Options</i> object, set the appropriate options, and pass this
|
||||||
object to a RE constructor. Example:
|
object to a RE constructor. Example:
|
||||||
<pre>
|
<pre>
|
||||||
RE_options opt;
|
RE_Options opt;
|
||||||
opt.set_caseless(true);
|
opt.set_caseless(true);
|
||||||
if (RE("HELLO", opt).PartialMatch("hello world")) ...
|
if (RE("HELLO", opt).PartialMatch("hello world")) ...
|
||||||
</pre>
|
</pre>
|
||||||
@ -282,10 +283,7 @@ is defined in the pcrecpp namespace.
|
|||||||
Example: read lines of the form "var = value" from a string.
|
Example: read lines of the form "var = value" from a string.
|
||||||
string contents = ...; // Fill string somehow
|
string contents = ...; // Fill string somehow
|
||||||
pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
|
pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
|
||||||
</PRE>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<pre>
|
|
||||||
string var;
|
string var;
|
||||||
int value;
|
int value;
|
||||||
pcrecpp::RE re("(\\w+) = (\\d+)\n");
|
pcrecpp::RE re("(\\w+) = (\\d+)\n");
|
||||||
@ -363,7 +361,7 @@ Copyright © 2007 Google Inc.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC12" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 17 March 2009
|
Last updated: 08 January 2012
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
426
tools/pcre/doc/html/pcredemo.html
Normal file
426
tools/pcre/doc/html/pcredemo.html
Normal file
@ -0,0 +1,426 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcredemo specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcredemo man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<ul>
|
||||||
|
</ul>
|
||||||
|
<PRE>
|
||||||
|
/*************************************************
|
||||||
|
* PCRE DEMONSTRATION PROGRAM *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is a demonstration program to illustrate the most straightforward ways
|
||||||
|
of calling the PCRE regular expression library from a C program. See the
|
||||||
|
pcresample documentation for a short discussion ("man pcresample" if you have
|
||||||
|
the PCRE man pages installed).
|
||||||
|
|
||||||
|
In Unix-like environments, if PCRE is installed in your standard system
|
||||||
|
libraries, you should be able to compile this program using this command:
|
||||||
|
|
||||||
|
gcc -Wall pcredemo.c -lpcre -o pcredemo
|
||||||
|
|
||||||
|
If PCRE is not installed in a standard place, it is likely to be installed with
|
||||||
|
support for the pkg-config mechanism. If you have pkg-config, you can compile
|
||||||
|
this program using this command:
|
||||||
|
|
||||||
|
gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
|
||||||
|
|
||||||
|
If you do not have pkg-config, you may have to use this:
|
||||||
|
|
||||||
|
gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
|
||||||
|
-R/usr/local/lib -lpcre -o pcredemo
|
||||||
|
|
||||||
|
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||||
|
library files for PCRE are installed on your system. Only some operating
|
||||||
|
systems (e.g. Solaris) use the -R option.
|
||||||
|
|
||||||
|
Building under Windows:
|
||||||
|
|
||||||
|
If you want to statically link this program against a non-dll .a file, you must
|
||||||
|
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
|
||||||
|
pcre_free() exported functions will be declared __declspec(dllimport), with
|
||||||
|
unwanted results. So in this environment, uncomment the following line. */
|
||||||
|
|
||||||
|
/* #define PCRE_STATIC */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <pcre.h>
|
||||||
|
|
||||||
|
#define OVECCOUNT 30 /* should be a multiple of 3 */
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
pcre *re;
|
||||||
|
const char *error;
|
||||||
|
char *pattern;
|
||||||
|
char *subject;
|
||||||
|
unsigned char *name_table;
|
||||||
|
unsigned int option_bits;
|
||||||
|
int erroffset;
|
||||||
|
int find_all;
|
||||||
|
int crlf_is_newline;
|
||||||
|
int namecount;
|
||||||
|
int name_entry_size;
|
||||||
|
int ovector[OVECCOUNT];
|
||||||
|
int subject_length;
|
||||||
|
int rc, i;
|
||||||
|
int utf8;
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* First, sort out the command line. There is only one possible option at *
|
||||||
|
* the moment, "-g" to request repeated matching to find all occurrences, *
|
||||||
|
* like Perl's /g option. We set the variable find_all to a non-zero value *
|
||||||
|
* if the -g option is present. Apart from that, there must be exactly two *
|
||||||
|
* arguments. *
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
find_all = 0;
|
||||||
|
for (i = 1; i < argc; i++)
|
||||||
|
{
|
||||||
|
if (strcmp(argv[i], "-g") == 0) find_all = 1;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* After the options, we require exactly two arguments, which are the pattern,
|
||||||
|
and the subject string. */
|
||||||
|
|
||||||
|
if (argc - i != 2)
|
||||||
|
{
|
||||||
|
printf("Two arguments required: a regex and a subject string\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern = argv[i];
|
||||||
|
subject = argv[i+1];
|
||||||
|
subject_length = (int)strlen(subject);
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* Now we are going to compile the regular expression pattern, and handle *
|
||||||
|
* and errors that are detected. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
re = pcre_compile(
|
||||||
|
pattern, /* the pattern */
|
||||||
|
0, /* default options */
|
||||||
|
&error, /* for error message */
|
||||||
|
&erroffset, /* for error offset */
|
||||||
|
NULL); /* use default character tables */
|
||||||
|
|
||||||
|
/* Compilation failed: print the error message and exit */
|
||||||
|
|
||||||
|
if (re == NULL)
|
||||||
|
{
|
||||||
|
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* If the compilation succeeded, we call PCRE again, in order to do a *
|
||||||
|
* pattern match against the subject string. This does just ONE match. If *
|
||||||
|
* further matching is needed, it will be done below. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
rc = pcre_exec(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
subject, /* the subject string */
|
||||||
|
subject_length, /* the length of the subject */
|
||||||
|
0, /* start at offset 0 in the subject */
|
||||||
|
0, /* default options */
|
||||||
|
ovector, /* output vector for substring information */
|
||||||
|
OVECCOUNT); /* number of elements in the output vector */
|
||||||
|
|
||||||
|
/* Matching failed: handle error cases */
|
||||||
|
|
||||||
|
if (rc < 0)
|
||||||
|
{
|
||||||
|
switch(rc)
|
||||||
|
{
|
||||||
|
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
|
||||||
|
/*
|
||||||
|
Handle other special cases if you like
|
||||||
|
*/
|
||||||
|
default: printf("Matching error %d\n", rc); break;
|
||||||
|
}
|
||||||
|
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match succeded */
|
||||||
|
|
||||||
|
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* We have found the first match within the subject string. If the output *
|
||||||
|
* vector wasn't big enough, say so. Then output any substrings that were *
|
||||||
|
* captured. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
/* The output vector wasn't big enough */
|
||||||
|
|
||||||
|
if (rc == 0)
|
||||||
|
{
|
||||||
|
rc = OVECCOUNT/3;
|
||||||
|
printf("ovector only has room for %d captured substrings\n", rc - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||||
|
application you might want to do things other than print them. */
|
||||||
|
|
||||||
|
for (i = 0; i < rc; i++)
|
||||||
|
{
|
||||||
|
char *substring_start = subject + ovector[2*i];
|
||||||
|
int substring_length = ovector[2*i+1] - ovector[2*i];
|
||||||
|
printf("%2d: %.*s\n", i, substring_length, substring_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* That concludes the basic part of this demonstration program. We have *
|
||||||
|
* compiled a pattern, and performed a single match. The code that follows *
|
||||||
|
* shows first how to access named substrings, and then how to code for *
|
||||||
|
* repeated matches on the same subject. *
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/* See if there are any named substrings, and if so, show them by name. First
|
||||||
|
we have to extract the count of named parentheses from the pattern. */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
PCRE_INFO_NAMECOUNT, /* number of named substrings */
|
||||||
|
&namecount); /* where to put the answer */
|
||||||
|
|
||||||
|
if (namecount <= 0) printf("No named substrings\n"); else
|
||||||
|
{
|
||||||
|
unsigned char *tabptr;
|
||||||
|
printf("Named substrings\n");
|
||||||
|
|
||||||
|
/* Before we can access the substrings, we must extract the table for
|
||||||
|
translating names to numbers, and the size of each entry in the table. */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
PCRE_INFO_NAMETABLE, /* address of the table */
|
||||||
|
&name_table); /* where to put the answer */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
|
||||||
|
&name_entry_size); /* where to put the answer */
|
||||||
|
|
||||||
|
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||||
|
and the substring itself. */
|
||||||
|
|
||||||
|
tabptr = name_table;
|
||||||
|
for (i = 0; i < namecount; i++)
|
||||||
|
{
|
||||||
|
int n = (tabptr[0] << 8) | tabptr[1];
|
||||||
|
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||||
|
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
|
||||||
|
tabptr += name_entry_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* If the "-g" option was given on the command line, we want to continue *
|
||||||
|
* to search for additional matches in the subject string, in a similar *
|
||||||
|
* way to the /g option in Perl. This turns out to be trickier than you *
|
||||||
|
* might think because of the possibility of matching an empty string. *
|
||||||
|
* What happens is as follows: *
|
||||||
|
* *
|
||||||
|
* If the previous match was NOT for an empty string, we can just start *
|
||||||
|
* the next match at the end of the previous one. *
|
||||||
|
* *
|
||||||
|
* If the previous match WAS for an empty string, we can't do that, as it *
|
||||||
|
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
|
||||||
|
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
|
||||||
|
* The first of these tells PCRE that an empty string at the start of the *
|
||||||
|
* subject is not a valid match; other possibilities must be tried. The *
|
||||||
|
* second flag restricts PCRE to one match attempt at the initial string *
|
||||||
|
* position. If this match succeeds, an alternative to the empty string *
|
||||||
|
* match has been found, and we can print it and proceed round the loop, *
|
||||||
|
* advancing by the length of whatever was found. If this match does not *
|
||||||
|
* succeed, we still stay in the loop, advancing by just one character. *
|
||||||
|
* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
|
||||||
|
* more than one byte. *
|
||||||
|
* *
|
||||||
|
* However, there is a complication concerned with newlines. When the *
|
||||||
|
* newline convention is such that CRLF is a valid newline, we must *
|
||||||
|
* advance by two characters rather than one. The newline convention can *
|
||||||
|
* be set in the regex by (*CR), etc.; if not, we must find the default. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
if (!find_all) /* Check for -g */
|
||||||
|
{
|
||||||
|
pcre_free(re); /* Release the memory used for the compiled pattern */
|
||||||
|
return 0; /* Finish unless -g was given */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
|
||||||
|
sequence. First, find the options with which the regex was compiled; extract
|
||||||
|
the UTF-8 state, and mask off all but the newline options. */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
|
||||||
|
utf8 = option_bits & PCRE_UTF8;
|
||||||
|
option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
|
||||||
|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
|
||||||
|
|
||||||
|
/* If no newline options were set, find the default newline convention from the
|
||||||
|
build configuration. */
|
||||||
|
|
||||||
|
if (option_bits == 0)
|
||||||
|
{
|
||||||
|
int d;
|
||||||
|
(void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
|
||||||
|
/* Note that these values are always the ASCII ones, even in
|
||||||
|
EBCDIC environments. CR = 13, NL = 10. */
|
||||||
|
option_bits = (d == 13)? PCRE_NEWLINE_CR :
|
||||||
|
(d == 10)? PCRE_NEWLINE_LF :
|
||||||
|
(d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
|
||||||
|
(d == -2)? PCRE_NEWLINE_ANYCRLF :
|
||||||
|
(d == -1)? PCRE_NEWLINE_ANY : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* See if CRLF is a valid newline sequence. */
|
||||||
|
|
||||||
|
crlf_is_newline =
|
||||||
|
option_bits == PCRE_NEWLINE_ANY ||
|
||||||
|
option_bits == PCRE_NEWLINE_CRLF ||
|
||||||
|
option_bits == PCRE_NEWLINE_ANYCRLF;
|
||||||
|
|
||||||
|
/* Loop for second and subsequent matches */
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int options = 0; /* Normally no options */
|
||||||
|
int start_offset = ovector[1]; /* Start at end of previous match */
|
||||||
|
|
||||||
|
/* If the previous match was for an empty string, we are finished if we are
|
||||||
|
at the end of the subject. Otherwise, arrange to run another match at the
|
||||||
|
same point to see if a non-empty match can be found. */
|
||||||
|
|
||||||
|
if (ovector[0] == ovector[1])
|
||||||
|
{
|
||||||
|
if (ovector[0] == subject_length) break;
|
||||||
|
options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Run the next matching operation */
|
||||||
|
|
||||||
|
rc = pcre_exec(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
subject, /* the subject string */
|
||||||
|
subject_length, /* the length of the subject */
|
||||||
|
start_offset, /* starting offset in the subject */
|
||||||
|
options, /* options */
|
||||||
|
ovector, /* output vector for substring information */
|
||||||
|
OVECCOUNT); /* number of elements in the output vector */
|
||||||
|
|
||||||
|
/* This time, a result of NOMATCH isn't an error. If the value in "options"
|
||||||
|
is zero, it just means we have found all possible matches, so the loop ends.
|
||||||
|
Otherwise, it means we have failed to find a non-empty-string match at a
|
||||||
|
point where there was a previous empty-string match. In this case, we do what
|
||||||
|
Perl does: advance the matching position by one character, and continue. We
|
||||||
|
do this by setting the "end of previous match" offset, because that is picked
|
||||||
|
up at the top of the loop as the point at which to start again.
|
||||||
|
|
||||||
|
There are two complications: (a) When CRLF is a valid newline sequence, and
|
||||||
|
the current position is just before it, advance by an extra byte. (b)
|
||||||
|
Otherwise we must ensure that we skip an entire UTF-8 character if we are in
|
||||||
|
UTF-8 mode. */
|
||||||
|
|
||||||
|
if (rc == PCRE_ERROR_NOMATCH)
|
||||||
|
{
|
||||||
|
if (options == 0) break; /* All matches found */
|
||||||
|
ovector[1] = start_offset + 1; /* Advance one byte */
|
||||||
|
if (crlf_is_newline && /* If CRLF is newline & */
|
||||||
|
start_offset < subject_length - 1 && /* we are at CRLF, */
|
||||||
|
subject[start_offset] == '\r' &&
|
||||||
|
subject[start_offset + 1] == '\n')
|
||||||
|
ovector[1] += 1; /* Advance by one more. */
|
||||||
|
else if (utf8) /* Otherwise, ensure we */
|
||||||
|
{ /* advance a whole UTF-8 */
|
||||||
|
while (ovector[1] < subject_length) /* character. */
|
||||||
|
{
|
||||||
|
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
|
||||||
|
ovector[1] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue; /* Go round the loop again */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Other matching errors are not recoverable. */
|
||||||
|
|
||||||
|
if (rc < 0)
|
||||||
|
{
|
||||||
|
printf("Matching error %d\n", rc);
|
||||||
|
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match succeded */
|
||||||
|
|
||||||
|
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
||||||
|
|
||||||
|
/* The match succeeded, but the output vector wasn't big enough. */
|
||||||
|
|
||||||
|
if (rc == 0)
|
||||||
|
{
|
||||||
|
rc = OVECCOUNT/3;
|
||||||
|
printf("ovector only has room for %d captured substrings\n", rc - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* As before, show substrings stored in the output vector by number, and then
|
||||||
|
also any named substrings. */
|
||||||
|
|
||||||
|
for (i = 0; i < rc; i++)
|
||||||
|
{
|
||||||
|
char *substring_start = subject + ovector[2*i];
|
||||||
|
int substring_length = ovector[2*i+1] - ovector[2*i];
|
||||||
|
printf("%2d: %.*s\n", i, substring_length, substring_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (namecount <= 0) printf("No named substrings\n"); else
|
||||||
|
{
|
||||||
|
unsigned char *tabptr = name_table;
|
||||||
|
printf("Named substrings\n");
|
||||||
|
for (i = 0; i < namecount; i++)
|
||||||
|
{
|
||||||
|
int n = (tabptr[0] << 8) | tabptr[1];
|
||||||
|
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||||
|
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
|
||||||
|
tabptr += name_entry_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} /* End of loop to find second and subsequent matches */
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcredemo.c */
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -16,16 +16,17 @@ man page, in case the conversion went wrong.
|
|||||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||||
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
|
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
|
||||||
<li><a name="TOC4" href="#SEC4">OPTIONS</a>
|
<li><a name="TOC4" href="#SEC4">BINARY FILES</a>
|
||||||
<li><a name="TOC5" href="#SEC5">ENVIRONMENT VARIABLES</a>
|
<li><a name="TOC5" href="#SEC5">OPTIONS</a>
|
||||||
<li><a name="TOC6" href="#SEC6">NEWLINES</a>
|
<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
|
||||||
<li><a name="TOC7" href="#SEC7">OPTIONS COMPATIBILITY</a>
|
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
|
||||||
<li><a name="TOC8" href="#SEC8">OPTIONS WITH DATA</a>
|
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
|
||||||
<li><a name="TOC9" href="#SEC9">MATCHING ERRORS</a>
|
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
|
||||||
<li><a name="TOC10" href="#SEC10">DIAGNOSTICS</a>
|
<li><a name="TOC10" href="#SEC10">MATCHING ERRORS</a>
|
||||||
<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
|
<li><a name="TOC11" href="#SEC11">DIAGNOSTICS</a>
|
||||||
<li><a name="TOC12" href="#SEC12">AUTHOR</a>
|
<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
|
||||||
<li><a name="TOC13" href="#SEC13">REVISION</a>
|
<li><a name="TOC13" href="#SEC13">AUTHOR</a>
|
||||||
|
<li><a name="TOC14" href="#SEC14">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -49,7 +50,7 @@ without delimiters. For example:
|
|||||||
If you attempt to use delimiters (for example, by surrounding a pattern with
|
If you attempt to use delimiters (for example, by surrounding a pattern with
|
||||||
slashes, as is common in Perl scripts), they are interpreted as part of the
|
slashes, as is common in Perl scripts), they are interpreted as part of the
|
||||||
pattern. Quotes can of course be used to delimit patterns on the command line
|
pattern. Quotes can of course be used to delimit patterns on the command line
|
||||||
because they are interpreted by the shell, and indeed they are required if a
|
because they are interpreted by the shell, and indeed quotes are required if a
|
||||||
pattern contains white space or shell metacharacters.
|
pattern contains white space or shell metacharacters.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -74,31 +75,40 @@ possible to search for patterns that span line boundaries. What defines a line
|
|||||||
boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
|
boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Patterns are limited to 8K or BUFSIZ characters, whichever is the greater.
|
The amount of memory used for buffering files that are being scanned is
|
||||||
|
controlled by a parameter that can be set by the <b>--buffer-size</b> option.
|
||||||
|
The default value for this parameter is specified when <b>pcregrep</b> is built,
|
||||||
|
with the default default being 20K. A block of memory three times this size is
|
||||||
|
used (to allow for buffering "before" and "after" lines). An error occurs if a
|
||||||
|
line overflows the buffer.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
||||||
BUFSIZ is defined in <b><stdio.h></b>. When there is more than one pattern
|
BUFSIZ is defined in <b><stdio.h></b>. When there is more than one pattern
|
||||||
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
|
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
|
||||||
each line in the order in which they are defined, except that all the <b>-e</b>
|
each line in the order in which they are defined, except that all the <b>-e</b>
|
||||||
patterns are tried before the <b>-f</b> patterns.
|
patterns are tried before the <b>-f</b> patterns.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, as soon as one pattern matches (or fails to match when <b>-v</b> is
|
By default, as soon as one pattern matches a line, no further patterns are
|
||||||
used), no further patterns are considered. However, if <b>--colour</b> (or
|
considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
|
||||||
<b>--color</b>) is used to colour the matching substrings, or if
|
matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
|
||||||
<b>--only-matching</b>, <b>--file-offsets</b>, or <b>--line-offsets</b> is used to
|
<b>--line-offsets</b> is used to output only the part of the line that matched
|
||||||
output only the part of the line that matched (either shown literally, or as an
|
(either shown literally, or as an offset), scanning resumes immediately
|
||||||
offset), scanning resumes immediately following the match, so that further
|
following the match, so that further matches on the same line can be found. If
|
||||||
matches on the same line can be found. If there are multiple patterns, they are
|
there are multiple patterns, they are all tried on the remainder of the line,
|
||||||
all tried on the remainder of the line, but patterns that follow the one that
|
but patterns that follow the one that matched are not tried on the earlier part
|
||||||
matched are not tried on the earlier part of the line.
|
of the line.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This is the same behaviour as GNU grep, but it does mean that the order in
|
This behaviour means that the order in which multiple patterns are specified
|
||||||
which multiple patterns are specified can affect the output when one of the
|
can affect the output when one of the above options is used. This is no longer
|
||||||
above options is used.
|
the same behaviour as GNU grep, which now manages to display earlier matches
|
||||||
|
for later patterns (as long as there is no overlap).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Patterns that can match an empty string are accepted, but empty string
|
Patterns that can match an empty string are accepted, but empty string
|
||||||
matches are not recognized. An example is the pattern "(super)?(man)?", in
|
matches are never recognized. An example is the pattern "(super)?(man)?", in
|
||||||
which all components are optional. This pattern finds all occurrences of both
|
which all components are optional. This pattern finds all occurrences of both
|
||||||
"super" and "man"; the output differs from matching with "super|man" when only
|
"super" and "man"; the output differs from matching with "super|man" when only
|
||||||
the matching substrings are being shown.
|
the matching substrings are being shown.
|
||||||
@ -117,10 +127,25 @@ of these file types by running it with the <b>--help</b> option. If the
|
|||||||
appropriate support is not present, files are treated as plain text. The
|
appropriate support is not present, files are treated as plain text. The
|
||||||
standard input is always so treated.
|
standard input is always so treated.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">OPTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
|
||||||
|
<P>
|
||||||
|
By default, a file that contains a binary zero byte within the first 1024 bytes
|
||||||
|
is identified as a binary file, and is processed specially. (GNU grep also
|
||||||
|
identifies binary files in this manner.) See the <b>--binary-files</b> option
|
||||||
|
for a means of changing the way binary files are handled.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
The order in which some of the options appear can affect the output. For
|
||||||
|
example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
|
||||||
|
names. Whichever comes later in the command line will be the one that takes
|
||||||
|
effect. Similarly, except where noted below, if an option is given twice, the
|
||||||
|
later setting is used. Numerical values for options may be followed by K or M,
|
||||||
|
to signify multiplication by 1024 or 1024*1024 respectively.
|
||||||
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--</b>
|
<b>--</b>
|
||||||
This terminate the list of options. It is useful if the next item on the
|
This terminates the list of options. It is useful if the next item on the
|
||||||
command line starts with a hyphen but is not an option. This allows for the
|
command line starts with a hyphen but is not an option. This allows for the
|
||||||
processing of patterns and filenames that start with hyphens.
|
processing of patterns and filenames that start with hyphens.
|
||||||
</P>
|
</P>
|
||||||
@ -134,6 +159,11 @@ of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
|
|||||||
guarantees to have up to 8K of following text available for context output.
|
guarantees to have up to 8K of following text available for context output.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-a</b>, <b>--text</b>
|
||||||
|
Treat binary files as text. This is equivalent to
|
||||||
|
<b>--binary-files</b>=<i>text</i>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
||||||
Output <i>number</i> lines of context before each matching line. If filenames
|
Output <i>number</i> lines of context before each matching line. If filenames
|
||||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
and/or line numbers are being output, a hyphen separator is used instead of a
|
||||||
@ -143,16 +173,36 @@ of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
|
|||||||
guarantees to have up to 8K of preceding text available for context output.
|
guarantees to have up to 8K of preceding text available for context output.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>--binary-files=</b><i>word</i>
|
||||||
|
Specify how binary files are to be processed. If the word is "binary" (the
|
||||||
|
default), pattern matching is performed on binary files, but the only output is
|
||||||
|
"Binary file <name> matches" when a match succeeds. If the word is "text",
|
||||||
|
which is equivalent to the <b>-a</b> or <b>--text</b> option, binary files are
|
||||||
|
processed in the same way as any other file. In this case, when a match
|
||||||
|
succeeds, the output may be binary garbage, which can have nasty effects if
|
||||||
|
sent to a terminal. If the word is "without-match", which is equivalent to the
|
||||||
|
<b>-I</b> option, binary files are not processed at all; they are assumed not to
|
||||||
|
be of interest.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>--buffer-size=</b><i>number</i>
|
||||||
|
Set the parameter that controls how much memory is used for buffering files
|
||||||
|
that are being scanned.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
|
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
|
||||||
Output <i>number</i> lines of context both before and after each matching line.
|
Output <i>number</i> lines of context both before and after each matching line.
|
||||||
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
|
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-c</b>, <b>--count</b>
|
<b>-c</b>, <b>--count</b>
|
||||||
Do not output individual lines; instead just output a count of the number of
|
Do not output individual lines from the files that are being scanned; instead
|
||||||
lines that would otherwise have been output. If several files are given, a
|
output the number of lines that would otherwise have been shown. If no lines
|
||||||
count is output for each of them. In this mode, the <b>-A</b>, <b>-B</b>, and
|
are selected, the number zero is output. If several files are are being
|
||||||
<b>-C</b> options are ignored.
|
scanned, a count is output for each of them. However, if the
|
||||||
|
<b>--files-with-matches</b> option is also used, only those files whose counts
|
||||||
|
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
|
||||||
|
<b>-B</b>, and <b>-C</b> options are ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--colour</b>, <b>--color</b>
|
<b>--colour</b>, <b>--color</b>
|
||||||
@ -169,8 +219,8 @@ coloured. The value (which is optional, see above) may be "never", "always", or
|
|||||||
connected to a terminal. More resources are used when colouring is enabled,
|
connected to a terminal. More resources are used when colouring is enabled,
|
||||||
because <b>pcregrep</b> has to search for all possible matches in a line, not
|
because <b>pcregrep</b> has to search for all possible matches in a line, not
|
||||||
just one, in order to colour them all.
|
just one, in order to colour them all.
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
The colour that is used can be specified by setting the environment variable
|
The colour that is used can be specified by setting the environment variable
|
||||||
PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
|
PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
|
||||||
string of two numbers, separated by a semicolon. They are copied directly into
|
string of two numbers, separated by a semicolon. They are copied directly into
|
||||||
@ -187,10 +237,12 @@ it is to be processed. Valid values are "read" (the default) or "skip"
|
|||||||
<P>
|
<P>
|
||||||
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
|
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
|
||||||
If an input path is a directory, "action" specifies how it is to be processed.
|
If an input path is a directory, "action" specifies how it is to be processed.
|
||||||
Valid values are "read" (the default), "recurse" (equivalent to the <b>-r</b>
|
Valid values are "read" (the default in non-Windows environments, for
|
||||||
option), or "skip" (silently skip the path). In the default case, directories
|
compatibility with GNU grep), "recurse" (equivalent to the <b>-r</b> option), or
|
||||||
are read as if they were ordinary files. In some operating systems the effect
|
"skip" (silently skip the path, the default in Windows environments). In the
|
||||||
of reading a directory like this is an immediate end-of-file.
|
"read" case, directories are read as if they were ordinary files. In some
|
||||||
|
operating systems the effect of reading a directory like this is an immediate
|
||||||
|
end-of-file; in others it may provoke an error.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
|
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
|
||||||
@ -198,59 +250,94 @@ Specify a pattern to be matched. This option can be used multiple times in
|
|||||||
order to specify several patterns. It can also be used as a way of specifying a
|
order to specify several patterns. It can also be used as a way of specifying a
|
||||||
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
|
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
|
||||||
pattern is taken from the command line; all arguments are treated as file
|
pattern is taken from the command line; all arguments are treated as file
|
||||||
names. There is an overall maximum of 100 patterns. They are applied to each
|
names. There is no limit to the number of patterns. They are applied to each
|
||||||
line in the order in which they are defined until one matches (or fails to
|
line in the order in which they are defined until one matches.
|
||||||
match if <b>-v</b> is used). If <b>-f</b> is used with <b>-e</b>, the command line
|
<br>
|
||||||
patterns are matched first, followed by the patterns from the file, independent
|
<br>
|
||||||
of the order in which these options are specified. Note that multiple use of
|
If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
|
||||||
<b>-e</b> is not the same as a single pattern with alternatives. For example,
|
followed by the patterns from the file(s), independent of the order in which
|
||||||
X|Y finds the first character in a line that is X or Y, whereas if the two
|
these options are specified. Note that multiple use of <b>-e</b> is not the same
|
||||||
patterns are given separately, <b>pcregrep</b> finds X if it is present, even if
|
as a single pattern with alternatives. For example, X|Y finds the first
|
||||||
it follows Y in the line. It finds Y only if there is no X in the line. This
|
character in a line that is X or Y, whereas if the two patterns are given
|
||||||
really matters only if you are using <b>-o</b> to show the part(s) of the line
|
separately, with X first, <b>pcregrep</b> finds X if it is present, even if it
|
||||||
that matched.
|
follows Y in the line. It finds Y only if there is no X in the line. This
|
||||||
|
matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
|
||||||
|
of the line that matched.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--exclude</b>=<i>pattern</i>
|
<b>--exclude</b>=<i>pattern</i>
|
||||||
When <b>pcregrep</b> is searching the files in a directory as a consequence of
|
Files (but not directories) whose names match the pattern are skipped without
|
||||||
the <b>-r</b> (recursive search) option, any regular files whose names match the
|
being processed. This applies to all files, whether listed on the command line,
|
||||||
pattern are excluded. Subdirectories are not excluded by this option; they are
|
obtained from <b>--file-list</b>, or by scanning a directory. The pattern is a
|
||||||
searched recursively, subject to the <b>--exclude_dir</b> and
|
PCRE regular expression, and is matched against the final component of the file
|
||||||
<b>--include_dir</b> options. The pattern is a PCRE regular expression, and is
|
name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
|
||||||
matched against the final component of the file name (not the entire path). If
|
apply to this pattern. The option may be given any number of times in order to
|
||||||
a file name matches both <b>--include</b> and <b>--exclude</b>, it is excluded.
|
specify multiple patterns. If a file name matches both an <b>--include</b>
|
||||||
There is no short form for this option.
|
and an <b>--exclude</b> pattern, it is excluded. There is no short form for this
|
||||||
|
option.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--exclude_dir</b>=<i>pattern</i>
|
<b>--exclude-from=</b><i>filename</i>
|
||||||
When <b>pcregrep</b> is searching the contents of a directory as a consequence
|
Treat each non-empty line of the file as the data for an <b>--exclude</b>
|
||||||
of the <b>-r</b> (recursive search) option, any subdirectories whose names match
|
option. What constitutes a newline when reading the file is the operating
|
||||||
the pattern are excluded. (Note that the \fP--exclude\fP option does not affect
|
system's default. The <b>--newline</b> option has no effect on this option. This
|
||||||
subdirectories.) The pattern is a PCRE regular expression, and is matched
|
option may be given more than once in order to specify a number of files to
|
||||||
against the final component of the name (not the entire path). If a
|
read.
|
||||||
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it
|
</P>
|
||||||
is excluded. There is no short form for this option.
|
<P>
|
||||||
|
<b>--exclude-dir</b>=<i>pattern</i>
|
||||||
|
Directories whose names match the pattern are skipped without being processed,
|
||||||
|
whatever the setting of the <b>--recursive</b> option. This applies to all
|
||||||
|
directories, whether listed on the command line, obtained from
|
||||||
|
<b>--file-list</b>, or by scanning a parent directory. The pattern is a PCRE
|
||||||
|
regular expression, and is matched against the final component of the directory
|
||||||
|
name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
|
||||||
|
apply to this pattern. The option may be given any number of times in order to
|
||||||
|
specify more than one pattern. If a directory matches both <b>--include-dir</b>
|
||||||
|
and <b>--exclude-dir</b>, it is excluded. There is no short form for this
|
||||||
|
option.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-F</b>, <b>--fixed-strings</b>
|
<b>-F</b>, <b>--fixed-strings</b>
|
||||||
Interpret each pattern as a list of fixed strings, separated by newlines,
|
Interpret each data-matching pattern as a list of fixed strings, separated by
|
||||||
instead of as a regular expression. The <b>-w</b> (match as a word) and <b>-x</b>
|
newlines, instead of as a regular expression. What constitutes a newline for
|
||||||
(match whole line) options can be used with <b>-F</b>. They apply to each of the
|
this purpose is controlled by the <b>--newline</b> option. The <b>-w</b> (match
|
||||||
fixed strings. A line is selected if any of the fixed strings are found in it
|
as a word) and <b>-x</b> (match whole line) options can be used with <b>-F</b>.
|
||||||
(subject to <b>-w</b> or <b>-x</b>, if present).
|
They apply to each of the fixed strings. A line is selected if any of the fixed
|
||||||
|
strings are found in it (subject to <b>-w</b> or <b>-x</b>, if present). This
|
||||||
|
option applies only to the patterns that are matched against the contents of
|
||||||
|
files; it does not apply to patterns specified by any of the <b>--include</b> or
|
||||||
|
<b>--exclude</b> options.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
|
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
|
||||||
Read a number of patterns from the file, one per line, and match them against
|
Read patterns from the file, one per line, and match them against
|
||||||
each line of input. A data line is output if any of the patterns match it. The
|
each line of input. What constitutes a newline when reading the file is the
|
||||||
filename can be given as "-" to refer to the standard input. When <b>-f</b> is
|
operating system's default. The <b>--newline</b> option has no effect on this
|
||||||
used, patterns specified on the command line using <b>-e</b> may also be
|
option. Trailing white space is removed from each line, and blank lines are
|
||||||
present; they are tested before the file's patterns. However, no other pattern
|
ignored. An empty file contains no patterns and therefore matches nothing. See
|
||||||
is taken from the command line; all arguments are treated as file names. There
|
also the comments about multiple patterns versus a single pattern with
|
||||||
is an overall maximum of 100 patterns. Trailing white space is removed from
|
alternatives in the description of <b>-e</b> above.
|
||||||
each line, and blank lines are ignored. An empty file contains no patterns and
|
<br>
|
||||||
therefore matches nothing. See also the comments about multiple patterns versus
|
<br>
|
||||||
a single pattern with alternatives in the description of <b>-e</b> above.
|
If this option is given more than once, all the specified files are
|
||||||
|
read. A data line is output if any of the patterns match it. A filename can
|
||||||
|
be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
|
||||||
|
specified on the command line using <b>-e</b> may also be present; they are
|
||||||
|
tested before the file's patterns. However, no other pattern is taken from the
|
||||||
|
command line; all arguments are treated as the names of paths to be searched.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>--file-list</b>=<i>filename</i>
|
||||||
|
Read a list of files and/or directories that are to be scanned from the given
|
||||||
|
file, one per line. Trailing white space is removed from each line, and blank
|
||||||
|
lines are ignored. These paths are processed before any that are listed on the
|
||||||
|
command line. The filename can be given as "-" to refer to the standard input.
|
||||||
|
If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
|
||||||
|
read first. This is useful only when the standard input is a terminal, from
|
||||||
|
which further lines (the list of files) can be read after an end-of-file
|
||||||
|
indication. If this option is given more than once, all the specified files are
|
||||||
|
read.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--file-offsets</b>
|
<b>--file-offsets</b>
|
||||||
@ -279,7 +366,13 @@ If a line number is also being output, it follows the file name.
|
|||||||
<P>
|
<P>
|
||||||
<b>--help</b>
|
<b>--help</b>
|
||||||
Output a help message, giving brief details of the command options and file
|
Output a help message, giving brief details of the command options and file
|
||||||
type support, and then exit.
|
type support, and then exit. Anything else on the command line is
|
||||||
|
ignored.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>-I</b>
|
||||||
|
Treat binary files as never matching. This is equivalent to
|
||||||
|
<b>--binary-files</b>=<i>without-match</i>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-i</b>, <b>--ignore-case</b>
|
<b>-i</b>, <b>--ignore-case</b>
|
||||||
@ -287,24 +380,35 @@ Ignore upper/lower case distinctions during comparisons.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--include</b>=<i>pattern</i>
|
<b>--include</b>=<i>pattern</i>
|
||||||
When <b>pcregrep</b> is searching the files in a directory as a consequence of
|
If any <b>--include</b> patterns are specified, the only files that are
|
||||||
the <b>-r</b> (recursive search) option, only those regular files whose names
|
processed are those that match one of the patterns (and do not match an
|
||||||
match the pattern are included. Subdirectories are always included and searched
|
<b>--exclude</b> pattern). This option does not affect directories, but it
|
||||||
recursively, subject to the \fP--include_dir\fP and <b>--exclude_dir</b>
|
applies to all files, whether listed on the command line, obtained from
|
||||||
options. The pattern is a PCRE regular expression, and is matched against the
|
<b>--file-list</b>, or by scanning a directory. The pattern is a PCRE regular
|
||||||
final component of the file name (not the entire path). If a file name matches
|
expression, and is matched against the final component of the file name, not
|
||||||
both <b>--include</b> and <b>--exclude</b>, it is excluded. There is no short
|
the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not apply to
|
||||||
form for this option.
|
this pattern. The option may be given any number of times. If a file name
|
||||||
|
matches both an <b>--include</b> and an <b>--exclude</b> pattern, it is excluded.
|
||||||
|
There is no short form for this option.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--include_dir</b>=<i>pattern</i>
|
<b>--include-from=</b><i>filename</i>
|
||||||
When <b>pcregrep</b> is searching the contents of a directory as a consequence
|
Treat each non-empty line of the file as the data for an <b>--include</b>
|
||||||
of the <b>-r</b> (recursive search) option, only those subdirectories whose
|
option. What constitutes a newline for this purpose is the operating system's
|
||||||
names match the pattern are included. (Note that the <b>--include</b> option
|
default. The <b>--newline</b> option has no effect on this option. This option
|
||||||
does not affect subdirectories.) The pattern is a PCRE regular expression, and
|
may be given any number of times; all the files are read.
|
||||||
is matched against the final component of the name (not the entire path). If a
|
</P>
|
||||||
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it
|
<P>
|
||||||
is excluded. There is no short form for this option.
|
<b>--include-dir</b>=<i>pattern</i>
|
||||||
|
If any <b>--include-dir</b> patterns are specified, the only directories that
|
||||||
|
are processed are those that match one of the patterns (and do not match an
|
||||||
|
<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
|
||||||
|
on the command line, obtained from <b>--file-list</b>, or by scanning a parent
|
||||||
|
directory. The pattern is a PCRE regular expression, and is matched against the
|
||||||
|
final component of the directory name, not the entire path. The <b>-F</b>,
|
||||||
|
<b>-w</b>, and <b>-x</b> options do not apply to this pattern. The option may be
|
||||||
|
given any number of times. If a directory matches both <b>--include-dir</b> and
|
||||||
|
<b>--exclude-dir</b>, it is excluded. There is no short form for this option.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-L</b>, <b>--files-without-match</b>
|
<b>-L</b>, <b>--files-without-match</b>
|
||||||
@ -316,8 +420,11 @@ output once, on a separate line.
|
|||||||
<b>-l</b>, <b>--files-with-matches</b>
|
<b>-l</b>, <b>--files-with-matches</b>
|
||||||
Instead of outputting lines from the files, just output the names of the files
|
Instead of outputting lines from the files, just output the names of the files
|
||||||
containing lines that would have been output. Each file name is output
|
containing lines that would have been output. Each file name is output
|
||||||
once, on a separate line. Searching stops as soon as a matching line is found
|
once, on a separate line. Searching normally stops as soon as a matching line
|
||||||
in a file.
|
is found in a file. However, if the <b>-c</b> (count) option is also used,
|
||||||
|
matching continues in order to obtain the correct count, and those files that
|
||||||
|
have at least one match are listed along with their counts. Using this option
|
||||||
|
with <b>-c</b> is a way of suppressing the listing of files with no matches.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--label</b>=<i>name</i>
|
<b>--label</b>=<i>name</i>
|
||||||
@ -326,6 +433,17 @@ are being output. If not supplied, "(standard input)" is used. There is no
|
|||||||
short form for this option.
|
short form for this option.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>--line-buffered</b>
|
||||||
|
When this option is given, input is read and processed line by line, and the
|
||||||
|
output is flushed after each write. By default, input is read in large chunks,
|
||||||
|
unless <b>pcregrep</b> can determine that it is reading from a terminal (which
|
||||||
|
is currently possible only in Unix-like environments). Output to terminal is
|
||||||
|
normally automatically flushed by the operating system. This option can be
|
||||||
|
useful when the input or output is attached to a pipe and you do not want
|
||||||
|
<b>pcregrep</b> to buffer up large amounts of data. However, its use will affect
|
||||||
|
performance, and the <b>-M</b> (multiline) option ceases to work.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>--line-offsets</b>
|
<b>--line-offsets</b>
|
||||||
Instead of showing lines or parts of lines that match, show each match as a
|
Instead of showing lines or parts of lines that match, show each match as a
|
||||||
line number, the offset from the start of the line, and a length. The line
|
line number, the offset from the start of the line, and a length. The line
|
||||||
@ -343,27 +461,62 @@ locale is specified, the PCRE library's default (usually the "C" locale) is
|
|||||||
used. There is no short form for this option.
|
used. There is no short form for this option.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>--match-limit</b>=<i>number</i>
|
||||||
|
Processing some regular expression patterns can require a very large amount of
|
||||||
|
memory, leading in some cases to a program crash if not enough is available.
|
||||||
|
Other patterns may take a very long time to search for all possible matching
|
||||||
|
strings. The <b>pcre_exec()</b> function that is called by <b>pcregrep</b> to do
|
||||||
|
the matching has two parameters that can limit the resources that it uses.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
The <b>--match-limit</b> option provides a means of limiting resource usage
|
||||||
|
when processing patterns that are not going to match, but which have a very
|
||||||
|
large number of possibilities in their search trees. The classic example is a
|
||||||
|
pattern that uses nested unlimited repeats. Internally, PCRE uses a function
|
||||||
|
called <b>match()</b> which it calls repeatedly (sometimes recursively). The
|
||||||
|
limit set by <b>--match-limit</b> is imposed on the number of times this
|
||||||
|
function is called during a match, which has the effect of limiting the amount
|
||||||
|
of backtracking that can take place.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
The <b>--recursion-limit</b> option is similar to <b>--match-limit</b>, but
|
||||||
|
instead of limiting the total number of times that <b>match()</b> is called, it
|
||||||
|
limits the depth of recursive calls, which in turn limits the amount of memory
|
||||||
|
that can be used. The recursion depth is a smaller number than the total number
|
||||||
|
of calls, because not all calls to <b>match()</b> are recursive. This limit is
|
||||||
|
of use only if it is set smaller than <b>--match-limit</b>.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
There are no short forms for these options. The default settings are specified
|
||||||
|
when the PCRE library is compiled, with the default default being 10 million.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-M</b>, <b>--multiline</b>
|
<b>-M</b>, <b>--multiline</b>
|
||||||
Allow patterns to match more than one line. When this option is given, patterns
|
Allow patterns to match more than one line. When this option is given, patterns
|
||||||
may usefully contain literal newline characters and internal occurrences of ^
|
may usefully contain literal newline characters and internal occurrences of ^
|
||||||
and $ characters. The output for any one match may consist of more than one
|
and $ characters. The output for a successful match may consist of more than
|
||||||
line. When this option is set, the PCRE library is called in "multiline" mode.
|
one line, the last of which is the one in which the match ended. If the matched
|
||||||
|
string ends with a newline sequence the output ends at the end of that line.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
When this option is set, the PCRE library is called in "multiline" mode.
|
||||||
There is a limit to the number of lines that can be matched, imposed by the way
|
There is a limit to the number of lines that can be matched, imposed by the way
|
||||||
that <b>pcregrep</b> buffers the input file as it scans it. However,
|
that <b>pcregrep</b> buffers the input file as it scans it. However,
|
||||||
<b>pcregrep</b> ensures that at least 8K characters or the rest of the document
|
<b>pcregrep</b> ensures that at least 8K characters or the rest of the document
|
||||||
(whichever is the shorter) are available for forward matching, and similarly
|
(whichever is the shorter) are available for forward matching, and similarly
|
||||||
the previous 8K characters (or all the previous characters, if fewer than 8K)
|
the previous 8K characters (or all the previous characters, if fewer than 8K)
|
||||||
are guaranteed to be available for lookbehind assertions.
|
are guaranteed to be available for lookbehind assertions. This option does not
|
||||||
|
work when input is read line by line (see \fP--line-buffered\fP.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-N</b> <i>newline-type</i>, <b>--newline=</b><i>newline-type</i>
|
<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
|
||||||
The PCRE library supports five different conventions for indicating
|
The PCRE library supports five different conventions for indicating
|
||||||
the ends of lines. They are the single-character sequences CR (carriage return)
|
the ends of lines. They are the single-character sequences CR (carriage return)
|
||||||
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
|
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
|
||||||
which recognizes any of the preceding three types, and an "any" convention, in
|
which recognizes any of the preceding three types, and an "any" convention, in
|
||||||
which any Unicode line ending sequence is assumed to end a line. The Unicode
|
which any Unicode line ending sequence is assumed to end a line. The Unicode
|
||||||
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
|
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
|
||||||
(formfeed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
|
(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
|
||||||
PS (paragraph separator, U+2029).
|
PS (paragraph separator, U+2029).
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
@ -371,10 +524,13 @@ When the PCRE library is built, a default line-ending sequence is specified.
|
|||||||
This is normally the standard sequence for the operating system. Unless
|
This is normally the standard sequence for the operating system. Unless
|
||||||
otherwise specified by this option, <b>pcregrep</b> uses the library's default.
|
otherwise specified by this option, <b>pcregrep</b> uses the library's default.
|
||||||
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
|
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
|
||||||
makes it possible to use <b>pcregrep</b> on files that have come from other
|
makes it possible to use <b>pcregrep</b> to scan files that have come from other
|
||||||
environments without having to modify their line endings. If the data that is
|
environments without having to modify their line endings. If the data that is
|
||||||
being scanned does not agree with the convention set by this option,
|
being scanned does not agree with the convention set by this option,
|
||||||
<b>pcregrep</b> may behave in strange ways.
|
<b>pcregrep</b> may behave in strange ways. Note that this option does not
|
||||||
|
apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
|
||||||
|
<b>--include-from</b> options, which are expected to use the operating system's
|
||||||
|
standard newline sequence.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-n</b>, <b>--line-number</b>
|
<b>-n</b>, <b>--line-number</b>
|
||||||
@ -384,14 +540,46 @@ output, it precedes the line number. This option is forced if
|
|||||||
<b>--line-offsets</b> is used.
|
<b>--line-offsets</b> is used.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>--no-jit</b>
|
||||||
|
If the PCRE library is built with support for just-in-time compiling (which
|
||||||
|
speeds up matching), <b>pcregrep</b> automatically makes use of this, unless it
|
||||||
|
was explicitly disabled at build time. This option can be used to disable the
|
||||||
|
use of JIT at run time. It is provided for testing and working round problems.
|
||||||
|
It should never be needed in normal use.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-o</b>, <b>--only-matching</b>
|
<b>-o</b>, <b>--only-matching</b>
|
||||||
Show only the part of the line that matched a pattern. In this mode, no
|
Show only the part of the line that matched a pattern instead of the whole
|
||||||
context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are
|
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
||||||
ignored. If there is more than one match in a line, each of them is shown
|
<b>-C</b> options are ignored. If there is more than one match in a line, each
|
||||||
separately. If <b>-o</b> is combined with <b>-v</b> (invert the sense of the
|
of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
|
||||||
match to find non-matching lines), no output is generated, but the return code
|
sense of the match to find non-matching lines), no output is generated, but the
|
||||||
is set appropriately. This option is mutually exclusive with
|
return code is set appropriately. If the matched portion of the line is empty,
|
||||||
<b>--file-offsets</b> and <b>--line-offsets</b>.
|
nothing is output unless the file name or line number are being printed, in
|
||||||
|
which case they are shown on an otherwise empty line. This option is mutually
|
||||||
|
exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
|
||||||
|
Show only the part of the line that matched the capturing parentheses of the
|
||||||
|
given number. Up to 32 capturing parentheses are supported, and -o0 is
|
||||||
|
equivalent to <b>-o</b> without a number. Because these options can be given
|
||||||
|
without an argument (see above), if an argument is present, it must be given in
|
||||||
|
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||||
|
for the non-argument case above also apply to this case. If the specified
|
||||||
|
capturing parentheses do not exist in the pattern, or were not set in the
|
||||||
|
match, nothing is output unless the file name or line number are being printed.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
If this option is given multiple times, multiple substrings are output, in the
|
||||||
|
order the options are given. For example, -o3 -o1 -o3 causes the substrings
|
||||||
|
matched by capturing parentheses 3 and 1 and then 3 again to be output. By
|
||||||
|
default, there is no separator (but see the next option).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>--om-separator</b>=<i>text</i>
|
||||||
|
Specify a separating string for multiple occurrences of <b>-o</b>. The default
|
||||||
|
is an empty string. Separating strings are never coloured.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-q</b>, <b>--quiet</b>
|
<b>-q</b>, <b>--quiet</b>
|
||||||
@ -407,6 +595,10 @@ immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
|
|||||||
option to "recurse".
|
option to "recurse".
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>--recursion-limit</b>=<i>number</i>
|
||||||
|
See <b>--match-limit</b> above.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-s</b>, <b>--no-messages</b>
|
<b>-s</b>, <b>--no-messages</b>
|
||||||
Suppress error messages about non-existent or unreadable files. Such files are
|
Suppress error messages about non-existent or unreadable files. Such files are
|
||||||
quietly skipped. However, the return code is still 2, even if matches were
|
quietly skipped. However, the return code is still 2, even if matches were
|
||||||
@ -415,13 +607,15 @@ found in other files.
|
|||||||
<P>
|
<P>
|
||||||
<b>-u</b>, <b>--utf-8</b>
|
<b>-u</b>, <b>--utf-8</b>
|
||||||
Operate in UTF-8 mode. This option is available only if PCRE has been compiled
|
Operate in UTF-8 mode. This option is available only if PCRE has been compiled
|
||||||
with UTF-8 support. Both patterns and subject lines must be valid strings of
|
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
||||||
UTF-8 characters.
|
<b>--include</b> options) and all subject lines that are scanned must be valid
|
||||||
|
strings of UTF-8 characters.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-V</b>, <b>--version</b>
|
<b>-V</b>, <b>--version</b>
|
||||||
Write the version numbers of <b>pcregrep</b> and the PCRE library that is being
|
Write the version numbers of <b>pcregrep</b> and the PCRE library to the
|
||||||
used to the standard error stream.
|
standard output and then exit. Anything else on the command line is
|
||||||
|
ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-v</b>, <b>--invert-match</b>
|
<b>-v</b>, <b>--invert-match</b>
|
||||||
@ -431,50 +625,74 @@ the patterns are the ones that are found.
|
|||||||
<P>
|
<P>
|
||||||
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
|
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
|
||||||
Force the patterns to match only whole words. This is equivalent to having \b
|
Force the patterns to match only whole words. This is equivalent to having \b
|
||||||
at the start and end of the pattern.
|
at the start and end of the pattern. This option applies only to the patterns
|
||||||
|
that are matched against the contents of files; it does not apply to patterns
|
||||||
|
specified by any of the <b>--include</b> or <b>--exclude</b> options.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
||||||
Force the patterns to be anchored (each must start matching at the beginning of
|
Force the patterns to be anchored (each must start matching at the beginning of
|
||||||
a line) and in addition, require them to match entire lines. This is
|
a line) and in addition, require them to match entire lines. This is equivalent
|
||||||
equivalent to having ^ and $ characters at the start and end of each
|
to having ^ and $ characters at the start and end of each alternative branch in
|
||||||
alternative branch in every pattern.
|
every pattern. This option applies only to the patterns that are matched
|
||||||
|
against the contents of files; it does not apply to patterns specified by any
|
||||||
|
of the <b>--include</b> or <b>--exclude</b> options.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
||||||
<P>
|
<P>
|
||||||
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
|
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
|
||||||
order, for a locale. The first one that is set is used. This can be overridden
|
order, for a locale. The first one that is set is used. This can be overridden
|
||||||
by the <b>--locale</b> option. If no locale is set, the PCRE library's default
|
by the <b>--locale</b> option. If no locale is set, the PCRE library's default
|
||||||
(usually the "C" locale) is used.
|
(usually the "C" locale) is used.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">NEWLINES</a><br>
|
<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>-N</b> (<b>--newline</b>) option allows <b>pcregrep</b> to scan files with
|
The <b>-N</b> (<b>--newline</b>) option allows <b>pcregrep</b> to scan files with
|
||||||
different newline conventions from the default. However, the setting of this
|
different newline conventions from the default. Any parts of the input files
|
||||||
option does not affect the way in which <b>pcregrep</b> writes information to
|
that are written to the standard output are copied identically, with whatever
|
||||||
the standard error and output streams. It uses the string "\n" in C
|
newline sequences they have in the input. However, the setting of this option
|
||||||
<b>printf()</b> calls to indicate newlines, relying on the C I/O library to
|
does not affect the interpretation of files specified by the <b>-f</b>,
|
||||||
convert this to an appropriate sequence if the output is sent to a file.
|
<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
|
||||||
|
the operating system's standard newline sequence, nor does it affect the way in
|
||||||
|
which <b>pcregrep</b> writes informational messages to the standard error and
|
||||||
|
output streams. For these it uses the string "\n" to indicate newlines,
|
||||||
|
relying on the C I/O library to convert this to an appropriate sequence.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
|
<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
|
||||||
<P>
|
<P>
|
||||||
The majority of short and long forms of <b>pcregrep</b>'s options are the same
|
Many of the short and long forms of <b>pcregrep</b>'s options are the same
|
||||||
as in the GNU <b>grep</b> program. Any long option of the form
|
as in the GNU <b>grep</b> program. Any long option of the form
|
||||||
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
||||||
(PCRE terminology). However, the <b>--locale</b>, <b>-M</b>, <b>--multiline</b>,
|
(PCRE terminology). However, the <b>--file-list</b>, <b>--file-offsets</b>,
|
||||||
<b>-u</b>, and <b>--utf-8</b> options are specific to <b>pcregrep</b>.
|
<b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
|
||||||
|
<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
|
||||||
|
<b>--recursion-limit</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
|
||||||
|
<b>pcregrep</b>, as is the use of the <b>--only-matching</b> option with a
|
||||||
|
capturing parentheses number.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">OPTIONS WITH DATA</a><br>
|
<P>
|
||||||
|
Although most of the common options work the same way, a few are different in
|
||||||
|
<b>pcregrep</b>. For example, the <b>--include</b> option's argument is a glob
|
||||||
|
for GNU <b>grep</b>, but a regular expression for <b>pcregrep</b>. If both the
|
||||||
|
<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
|
||||||
|
without counts, but <b>pcregrep</b> gives the counts.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
|
||||||
<P>
|
<P>
|
||||||
There are four different ways in which an option with data can be specified.
|
There are four different ways in which an option with data can be specified.
|
||||||
If a short form option is used, the data may follow immediately, or in the next
|
If a short form option is used, the data may follow immediately, or (with one
|
||||||
command line item. For example:
|
exception) in the next command line item. For example:
|
||||||
<pre>
|
<pre>
|
||||||
-f/some/file
|
-f/some/file
|
||||||
-f /some/file
|
-f /some/file
|
||||||
</pre>
|
</pre>
|
||||||
|
The exception is the <b>-o</b> option, which may appear with or without data.
|
||||||
|
Because of this, if data is present, it must follow immediately in the same
|
||||||
|
item, for example -o3.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
If a long form option is used, the data may appear in the same command line
|
If a long form option is used, the data may appear in the same command line
|
||||||
item, separated by an equals character, or (with one exception) it may appear
|
item, separated by an equals character, or (with two exceptions) it may appear
|
||||||
in the next command line item. For example:
|
in the next command line item. For example:
|
||||||
<pre>
|
<pre>
|
||||||
--file=/some/file
|
--file=/some/file
|
||||||
@ -486,12 +704,12 @@ separate the file name from the option, because the shell does not treat ~
|
|||||||
specially unless it is at the start of an item.
|
specially unless it is at the start of an item.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The exception to the above is the <b>--colour</b> (or <b>--color</b>) option,
|
The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
|
||||||
for which the data is optional. If this option does have data, it must be given
|
<b>--only-matching</b> options, for which the data is optional. If one of these
|
||||||
in the first form, using an equals character. Otherwise it will be assumed that
|
options does have data, it must be given in the first form, using an equals
|
||||||
it has no data.
|
character. Otherwise <b>pcregrep</b> will assume that it has no data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">MATCHING ERRORS</a><br>
|
<br><a name="SEC10" href="#TOC1">MATCHING ERRORS</a><br>
|
||||||
<P>
|
<P>
|
||||||
It is possible to supply a regular expression that takes a very long time to
|
It is possible to supply a regular expression that takes a very long time to
|
||||||
fail to match certain lines. Such patterns normally involve nested indefinite
|
fail to match certain lines. Such patterns normally involve nested indefinite
|
||||||
@ -501,19 +719,25 @@ in these circumstances. If this happens, <b>pcregrep</b> outputs an error
|
|||||||
message and the line that caused the problem to the standard error stream. If
|
message and the line that caused the problem to the standard error stream. If
|
||||||
there are more than 20 such errors, <b>pcregrep</b> gives up.
|
there are more than 20 such errors, <b>pcregrep</b> gives up.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">DIAGNOSTICS</a><br>
|
<P>
|
||||||
|
The <b>--match-limit</b> option of <b>pcregrep</b> can be used to set the overall
|
||||||
|
resource limit; there is a second option called <b>--recursion-limit</b> that
|
||||||
|
sets a limit on the amount of memory (usually stack) that is used (see the
|
||||||
|
discussion of these options above).
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC11" href="#TOC1">DIAGNOSTICS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
|
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
|
||||||
for syntax errors and non-existent or inacessible files (even if matches were
|
for syntax errors, overlong lines, non-existent or inaccessible files (even if
|
||||||
found in other files) or too many matching errors. Using the <b>-s</b> option to
|
matches were found in other files) or too many matching errors. Using the
|
||||||
suppress error messages about inaccessble files does not affect the return
|
<b>-s</b> option to suppress error messages about inaccessible files does not
|
||||||
code.
|
affect the return code.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcrepattern</b>(3), <b>pcretest</b>(1).
|
<b>pcrepattern</b>(3), <b>pcresyntax</b>(3), <b>pcretest</b>(1).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -522,11 +746,11 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 01 March 2009
|
Last updated: 13 September 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2009 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
458
tools/pcre/doc/html/pcrejit.html
Normal file
458
tools/pcre/doc/html/pcrejit.html
Normal file
@ -0,0 +1,458 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcrejit specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcrejit man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<ul>
|
||||||
|
<li><a name="TOC1" href="#SEC1">PCRE JUST-IN-TIME COMPILER SUPPORT</a>
|
||||||
|
<li><a name="TOC2" href="#SEC2">8-BIT, 16-BIT AND 32-BIT SUPPORT</a>
|
||||||
|
<li><a name="TOC3" href="#SEC3">AVAILABILITY OF JIT SUPPORT</a>
|
||||||
|
<li><a name="TOC4" href="#SEC4">SIMPLE USE OF JIT</a>
|
||||||
|
<li><a name="TOC5" href="#SEC5">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
|
||||||
|
<li><a name="TOC6" href="#SEC6">RETURN VALUES FROM JIT EXECUTION</a>
|
||||||
|
<li><a name="TOC7" href="#SEC7">SAVING AND RESTORING COMPILED PATTERNS</a>
|
||||||
|
<li><a name="TOC8" href="#SEC8">CONTROLLING THE JIT STACK</a>
|
||||||
|
<li><a name="TOC9" href="#SEC9">JIT STACK FAQ</a>
|
||||||
|
<li><a name="TOC10" href="#SEC10">EXAMPLE CODE</a>
|
||||||
|
<li><a name="TOC11" href="#SEC11">JIT FAST PATH API</a>
|
||||||
|
<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
|
||||||
|
<li><a name="TOC13" href="#SEC13">AUTHOR</a>
|
||||||
|
<li><a name="TOC14" href="#SEC14">REVISION</a>
|
||||||
|
</ul>
|
||||||
|
<br><a name="SEC1" href="#TOC1">PCRE JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||||
|
<P>
|
||||||
|
Just-in-time compiling is a heavyweight optimization that can greatly speed up
|
||||||
|
pattern matching. However, it comes at the cost of extra processing before the
|
||||||
|
match is performed. Therefore, it is of most benefit when the same pattern is
|
||||||
|
going to be matched many times. This does not necessarily mean many calls of a
|
||||||
|
matching function; if the pattern is not anchored, matching attempts may take
|
||||||
|
place many times at various positions in the subject, even for a single call.
|
||||||
|
Therefore, if the subject string is very long, it may still pay to use JIT for
|
||||||
|
one-off matches.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
JIT support applies only to the traditional Perl-compatible matching function.
|
||||||
|
It does not apply when the DFA matching function is being used. The code for
|
||||||
|
this support was written by Zoltan Herczeg.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC2" href="#TOC1">8-BIT, 16-BIT AND 32-BIT SUPPORT</a><br>
|
||||||
|
<P>
|
||||||
|
JIT support is available for all of the 8-bit, 16-bit and 32-bit PCRE
|
||||||
|
libraries. To keep this documentation simple, only the 8-bit interface is
|
||||||
|
described in what follows. If you are using the 16-bit library, substitute the
|
||||||
|
16-bit functions and 16-bit structures (for example, <i>pcre16_jit_stack</i>
|
||||||
|
instead of <i>pcre_jit_stack</i>). If you are using the 32-bit library,
|
||||||
|
substitute the 32-bit functions and 32-bit structures (for example,
|
||||||
|
<i>pcre32_jit_stack</i> instead of <i>pcre_jit_stack</i>).
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">AVAILABILITY OF JIT SUPPORT</a><br>
|
||||||
|
<P>
|
||||||
|
JIT support is an optional feature of PCRE. The "configure" option --enable-jit
|
||||||
|
(or equivalent CMake option) must be set when PCRE is built if you want to use
|
||||||
|
JIT. The support is limited to the following hardware platforms:
|
||||||
|
<pre>
|
||||||
|
ARM v5, v7, and Thumb2
|
||||||
|
Intel x86 32-bit and 64-bit
|
||||||
|
MIPS 32-bit
|
||||||
|
Power PC 32-bit and 64-bit
|
||||||
|
SPARC 32-bit (experimental)
|
||||||
|
</pre>
|
||||||
|
If --enable-jit is set on an unsupported platform, compilation fails.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
A program that is linked with PCRE 8.20 or later can tell if JIT support is
|
||||||
|
available by calling <b>pcre_config()</b> with the PCRE_CONFIG_JIT option. The
|
||||||
|
result is 1 when JIT is available, and 0 otherwise. However, a simple program
|
||||||
|
does not need to check this in order to use JIT. The normal API is implemented
|
||||||
|
in a way that falls back to the interpretive code if JIT is not available. For
|
||||||
|
programs that need the best possible performance, there is also a "fast path"
|
||||||
|
API that is JIT-specific.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If your program may sometimes be linked with versions of PCRE that are older
|
||||||
|
than 8.20, but you want to use JIT when it is available, you can test
|
||||||
|
the values of PCRE_MAJOR and PCRE_MINOR, or the existence of a JIT macro such
|
||||||
|
as PCRE_CONFIG_JIT, for compile-time control of your code.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC4" href="#TOC1">SIMPLE USE OF JIT</a><br>
|
||||||
|
<P>
|
||||||
|
You have to do two things to make use of the JIT support in the simplest way:
|
||||||
|
<pre>
|
||||||
|
(1) Call <b>pcre_study()</b> with the PCRE_STUDY_JIT_COMPILE option for
|
||||||
|
each compiled pattern, and pass the resulting <b>pcre_extra</b> block to
|
||||||
|
<b>pcre_exec()</b>.
|
||||||
|
|
||||||
|
(2) Use <b>pcre_free_study()</b> to free the <b>pcre_extra</b> block when it is
|
||||||
|
no longer needed, instead of just freeing it yourself. This ensures that
|
||||||
|
any JIT data is also freed.
|
||||||
|
</pre>
|
||||||
|
For a program that may be linked with pre-8.20 versions of PCRE, you can insert
|
||||||
|
<pre>
|
||||||
|
#ifndef PCRE_STUDY_JIT_COMPILE
|
||||||
|
#define PCRE_STUDY_JIT_COMPILE 0
|
||||||
|
#endif
|
||||||
|
</pre>
|
||||||
|
so that no option is passed to <b>pcre_study()</b>, and then use something like
|
||||||
|
this to free the study data:
|
||||||
|
<pre>
|
||||||
|
#ifdef PCRE_CONFIG_JIT
|
||||||
|
pcre_free_study(study_ptr);
|
||||||
|
#else
|
||||||
|
pcre_free(study_ptr);
|
||||||
|
#endif
|
||||||
|
</pre>
|
||||||
|
PCRE_STUDY_JIT_COMPILE requests the JIT compiler to generate code for complete
|
||||||
|
matches. If you want to run partial matches using the PCRE_PARTIAL_HARD or
|
||||||
|
PCRE_PARTIAL_SOFT options of <b>pcre_exec()</b>, you should set one or both of
|
||||||
|
the following options in addition to, or instead of, PCRE_STUDY_JIT_COMPILE
|
||||||
|
when you call <b>pcre_study()</b>:
|
||||||
|
<pre>
|
||||||
|
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
|
||||||
|
PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
|
||||||
|
</pre>
|
||||||
|
The JIT compiler generates different optimized code for each of the three
|
||||||
|
modes (normal, soft partial, hard partial). When <b>pcre_exec()</b> is called,
|
||||||
|
the appropriate code is run if it is available. Otherwise, the pattern is
|
||||||
|
matched using interpretive code.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In some circumstances you may need to call additional functions. These are
|
||||||
|
described in the section entitled
|
||||||
|
<a href="#stackcontrol">"Controlling the JIT stack"</a>
|
||||||
|
below.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If JIT support is not available, PCRE_STUDY_JIT_COMPILE etc. are ignored, and
|
||||||
|
no JIT data is created. Otherwise, the compiled pattern is passed to the JIT
|
||||||
|
compiler, which turns it into machine code that executes much faster than the
|
||||||
|
normal interpretive code. When <b>pcre_exec()</b> is passed a <b>pcre_extra</b>
|
||||||
|
block containing a pointer to JIT code of the appropriate mode (normal or
|
||||||
|
hard/soft partial), it obeys that code instead of running the interpreter. The
|
||||||
|
result is identical, but the compiled JIT code runs much faster.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There are some <b>pcre_exec()</b> options that are not supported for JIT
|
||||||
|
execution. There are also some pattern items that JIT cannot handle. Details
|
||||||
|
are given below. In both cases, execution automatically falls back to the
|
||||||
|
interpretive code. If you want to know whether JIT was actually used for a
|
||||||
|
particular match, you should arrange for a JIT callback function to be set up
|
||||||
|
as described in the section entitled
|
||||||
|
<a href="#stackcontrol">"Controlling the JIT stack"</a>
|
||||||
|
below, even if you do not need to supply a non-default JIT stack. Such a
|
||||||
|
callback function is called whenever JIT code is about to be obeyed. If the
|
||||||
|
execution options are not right for JIT execution, the callback function is not
|
||||||
|
obeyed.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If the JIT compiler finds an unsupported item, no JIT data is generated. You
|
||||||
|
can find out if JIT execution is available after studying a pattern by calling
|
||||||
|
<b>pcre_fullinfo()</b> with the PCRE_INFO_JIT option. A result of 1 means that
|
||||||
|
JIT compilation was successful. A result of 0 means that JIT support is not
|
||||||
|
available, or the pattern was not studied with PCRE_STUDY_JIT_COMPILE etc., or
|
||||||
|
the JIT compiler was not able to handle the pattern.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Once a pattern has been studied, with or without JIT, it can be used as many
|
||||||
|
times as you like for matching different subject strings.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC5" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
|
||||||
|
<P>
|
||||||
|
The only <b>pcre_exec()</b> options that are supported for JIT execution are
|
||||||
|
PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK, PCRE_NO_UTF32_CHECK, PCRE_NOTBOL,
|
||||||
|
PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and
|
||||||
|
PCRE_PARTIAL_SOFT.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The unsupported pattern items are:
|
||||||
|
<pre>
|
||||||
|
\C match a single byte; not supported in UTF-8 mode
|
||||||
|
(?Cn) callouts
|
||||||
|
(*PRUNE) )
|
||||||
|
(*SKIP) ) backtracking control verbs
|
||||||
|
(*THEN) )
|
||||||
|
</pre>
|
||||||
|
Support for some of these may be added in future.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT EXECUTION</a><br>
|
||||||
|
<P>
|
||||||
|
When a pattern is matched using JIT execution, the return values are the same
|
||||||
|
as those given by the interpretive <b>pcre_exec()</b> code, with the addition of
|
||||||
|
one new error code: PCRE_ERROR_JIT_STACKLIMIT. This means that the memory used
|
||||||
|
for the JIT stack was insufficient. See
|
||||||
|
<a href="#stackcontrol">"Controlling the JIT stack"</a>
|
||||||
|
below for a discussion of JIT stack usage. For compatibility with the
|
||||||
|
interpretive <b>pcre_exec()</b> code, no more than two-thirds of the
|
||||||
|
<i>ovector</i> argument is used for passing back captured substrings.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The error code PCRE_ERROR_MATCHLIMIT is returned by the JIT code if searching a
|
||||||
|
very large pattern tree goes on for too long, as it is in the same circumstance
|
||||||
|
when JIT is not used, but the details of exactly what is counted are not the
|
||||||
|
same. The PCRE_ERROR_RECURSIONLIMIT error code is never returned by JIT
|
||||||
|
execution.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC7" href="#TOC1">SAVING AND RESTORING COMPILED PATTERNS</a><br>
|
||||||
|
<P>
|
||||||
|
The code that is generated by the JIT compiler is architecture-specific, and is
|
||||||
|
also position dependent. For those reasons it cannot be saved (in a file or
|
||||||
|
database) and restored later like the bytecode and other data of a compiled
|
||||||
|
pattern. Saving and restoring compiled patterns is not something many people
|
||||||
|
do. More detail about this facility is given in the
|
||||||
|
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
||||||
|
documentation. It should be possible to run <b>pcre_study()</b> on a saved and
|
||||||
|
restored pattern, and thereby recreate the JIT data, but because JIT
|
||||||
|
compilation uses significant resources, it is probably not worth doing this;
|
||||||
|
you might as well recompile the original pattern.
|
||||||
|
<a name="stackcontrol"></a></P>
|
||||||
|
<br><a name="SEC8" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
|
||||||
|
<P>
|
||||||
|
When the compiled JIT code runs, it needs a block of memory to use as a stack.
|
||||||
|
By default, it uses 32K on the machine stack. However, some large or
|
||||||
|
complicated patterns need more than this. The error PCRE_ERROR_JIT_STACKLIMIT
|
||||||
|
is given when there is not enough stack. Three functions are provided for
|
||||||
|
managing blocks of memory for use as JIT stacks. There is further discussion
|
||||||
|
about the use of JIT stacks in the section entitled
|
||||||
|
<a href="#stackcontrol">"JIT stack FAQ"</a>
|
||||||
|
below.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <b>pcre_jit_stack_alloc()</b> function creates a JIT stack. Its arguments
|
||||||
|
are a starting size and a maximum size, and it returns a pointer to an opaque
|
||||||
|
structure of type <b>pcre_jit_stack</b>, or NULL if there is an error. The
|
||||||
|
<b>pcre_jit_stack_free()</b> function can be used to free a stack that is no
|
||||||
|
longer needed. (For the technically minded: the address space is allocated by
|
||||||
|
mmap or VirtualAlloc.)
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
JIT uses far less memory for recursion than the interpretive code,
|
||||||
|
and a maximum stack size of 512K to 1M should be more than enough for any
|
||||||
|
pattern.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <b>pcre_assign_jit_stack()</b> function specifies which stack JIT code
|
||||||
|
should use. Its arguments are as follows:
|
||||||
|
<pre>
|
||||||
|
pcre_extra *extra
|
||||||
|
pcre_jit_callback callback
|
||||||
|
void *data
|
||||||
|
</pre>
|
||||||
|
The <i>extra</i> argument must be the result of studying a pattern with
|
||||||
|
PCRE_STUDY_JIT_COMPILE etc. There are three cases for the values of the other
|
||||||
|
two options:
|
||||||
|
<pre>
|
||||||
|
(1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
|
||||||
|
on the machine stack is used.
|
||||||
|
|
||||||
|
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
|
||||||
|
a valid JIT stack, the result of calling <b>pcre_jit_stack_alloc()</b>.
|
||||||
|
|
||||||
|
(3) If <i>callback</i> is not NULL, it must point to a function that is
|
||||||
|
called with <i>data</i> as an argument at the start of matching, in
|
||||||
|
order to set up a JIT stack. If the return from the callback
|
||||||
|
function is NULL, the internal 32K stack is used; otherwise the
|
||||||
|
return value must be a valid JIT stack, the result of calling
|
||||||
|
<b>pcre_jit_stack_alloc()</b>.
|
||||||
|
</pre>
|
||||||
|
A callback function is obeyed whenever JIT code is about to be run; it is not
|
||||||
|
obeyed when <b>pcre_exec()</b> is called with options that are incompatible for
|
||||||
|
JIT execution. A callback function can therefore be used to determine whether a
|
||||||
|
match operation was executed by JIT or by the interpreter.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
You may safely use the same JIT stack for more than one pattern (either by
|
||||||
|
assigning directly or by callback), as long as the patterns are all matched
|
||||||
|
sequentially in the same thread. In a multithread application, if you do not
|
||||||
|
specify a JIT stack, or if you assign or pass back NULL from a callback, that
|
||||||
|
is thread-safe, because each thread has its own machine stack. However, if you
|
||||||
|
assign or pass back a non-NULL JIT stack, this must be a different stack for
|
||||||
|
each thread so that the application is thread-safe.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Strictly speaking, even more is allowed. You can assign the same non-NULL stack
|
||||||
|
to any number of patterns as long as they are not used for matching by multiple
|
||||||
|
threads at the same time. For example, you can assign the same stack to all
|
||||||
|
compiled patterns, and use a global mutex in the callback to wait until the
|
||||||
|
stack is available for use. However, this is an inefficient solution, and not
|
||||||
|
recommended.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
This is a suggestion for how a multithreaded program that needs to set up
|
||||||
|
non-default JIT stacks might operate:
|
||||||
|
<pre>
|
||||||
|
During thread initalization
|
||||||
|
thread_local_var = pcre_jit_stack_alloc(...)
|
||||||
|
|
||||||
|
During thread exit
|
||||||
|
pcre_jit_stack_free(thread_local_var)
|
||||||
|
|
||||||
|
Use a one-line callback function
|
||||||
|
return thread_local_var
|
||||||
|
</pre>
|
||||||
|
All the functions described in this section do nothing if JIT is not available,
|
||||||
|
and <b>pcre_assign_jit_stack()</b> does nothing unless the <b>extra</b> argument
|
||||||
|
is non-NULL and points to a <b>pcre_extra</b> block that is the result of a
|
||||||
|
successful study with PCRE_STUDY_JIT_COMPILE etc.
|
||||||
|
<a name="stackfaq"></a></P>
|
||||||
|
<br><a name="SEC9" href="#TOC1">JIT STACK FAQ</a><br>
|
||||||
|
<P>
|
||||||
|
(1) Why do we need JIT stacks?
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
PCRE (and JIT) is a recursive, depth-first engine, so it needs a stack where
|
||||||
|
the local data of the current node is pushed before checking its child nodes.
|
||||||
|
Allocating real machine stack on some platforms is difficult. For example, the
|
||||||
|
stack chain needs to be updated every time if we extend the stack on PowerPC.
|
||||||
|
Although it is possible, its updating time overhead decreases performance. So
|
||||||
|
we do the recursion in memory.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
(2) Why don't we simply allocate blocks of memory with <b>malloc()</b>?
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
Modern operating systems have a nice feature: they can reserve an address space
|
||||||
|
instead of allocating memory. We can safely allocate memory pages inside this
|
||||||
|
address space, so the stack could grow without moving memory data (this is
|
||||||
|
important because of pointers). Thus we can allocate 1M address space, and use
|
||||||
|
only a single memory page (usually 4K) if that is enough. However, we can still
|
||||||
|
grow up to 1M anytime if needed.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
(3) Who "owns" a JIT stack?
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
The owner of the stack is the user program, not the JIT studied pattern or
|
||||||
|
anything else. The user program must ensure that if a stack is used by
|
||||||
|
<b>pcre_exec()</b>, (that is, it is assigned to the pattern currently running),
|
||||||
|
that stack must not be used by any other threads (to avoid overwriting the same
|
||||||
|
memory area). The best practice for multithreaded programs is to allocate a
|
||||||
|
stack for each thread, and return this stack through the JIT callback function.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
(4) When should a JIT stack be freed?
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
You can free a JIT stack at any time, as long as it will not be used by
|
||||||
|
<b>pcre_exec()</b> again. When you assign the stack to a pattern, only a pointer
|
||||||
|
is set. There is no reference counting or any other magic. You can free the
|
||||||
|
patterns and stacks in any order, anytime. Just <i>do not</i> call
|
||||||
|
<b>pcre_exec()</b> with a pattern pointing to an already freed stack, as that
|
||||||
|
will cause SEGFAULT. (Also, do not free a stack currently used by
|
||||||
|
<b>pcre_exec()</b> in another thread). You can also replace the stack for a
|
||||||
|
pattern at any time. You can even free the previous stack before assigning a
|
||||||
|
replacement.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
(5) Should I allocate/free a stack every time before/after calling
|
||||||
|
<b>pcre_exec()</b>?
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
No, because this is too costly in terms of resources. However, you could
|
||||||
|
implement some clever idea which release the stack if it is not used in let's
|
||||||
|
say two minutes. The JIT callback can help to achieve this without keeping a
|
||||||
|
list of the currently JIT studied patterns.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
(6) OK, the stack is for long term memory allocation. But what happens if a
|
||||||
|
pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
|
||||||
|
stack is freed?
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
Especially on embedded sytems, it might be a good idea to release memory
|
||||||
|
sometimes without freeing the stack. There is no API for this at the moment.
|
||||||
|
Probably a function call which returns with the currently allocated memory for
|
||||||
|
any stack and another which allows releasing memory (shrinking the stack) would
|
||||||
|
be a good idea if someone needs this.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
(7) This is too much of a headache. Isn't there any better solution for JIT
|
||||||
|
stack handling?
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
|
||||||
|
out this complicated API.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC10" href="#TOC1">EXAMPLE CODE</a><br>
|
||||||
|
<P>
|
||||||
|
This is a single-threaded example that specifies a JIT stack without using a
|
||||||
|
callback.
|
||||||
|
<pre>
|
||||||
|
int rc;
|
||||||
|
int ovector[30];
|
||||||
|
pcre *re;
|
||||||
|
pcre_extra *extra;
|
||||||
|
pcre_jit_stack *jit_stack;
|
||||||
|
|
||||||
|
re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
|
||||||
|
/* Check for errors */
|
||||||
|
extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
|
||||||
|
jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
|
||||||
|
/* Check for error (NULL) */
|
||||||
|
pcre_assign_jit_stack(extra, NULL, jit_stack);
|
||||||
|
rc = pcre_exec(re, extra, subject, length, 0, 0, ovector, 30);
|
||||||
|
/* Check results */
|
||||||
|
pcre_free(re);
|
||||||
|
pcre_free_study(extra);
|
||||||
|
pcre_jit_stack_free(jit_stack);
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC11" href="#TOC1">JIT FAST PATH API</a><br>
|
||||||
|
<P>
|
||||||
|
Because the API described above falls back to interpreted execution when JIT is
|
||||||
|
not available, it is convenient for programs that are written for general use
|
||||||
|
in many environments. However, calling JIT via <b>pcre_exec()</b> does have a
|
||||||
|
performance impact. Programs that are written for use where JIT is known to be
|
||||||
|
available, and which need the best possible performance, can instead use a
|
||||||
|
"fast path" API to call JIT execution directly instead of calling
|
||||||
|
<b>pcre_exec()</b> (obviously only for patterns that have been successfully
|
||||||
|
studied by JIT).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The fast path function is called <b>pcre_jit_exec()</b>, and it takes exactly
|
||||||
|
the same arguments as <b>pcre_exec()</b>, plus one additional argument that
|
||||||
|
must point to a JIT stack. The JIT stack arrangements described above do not
|
||||||
|
apply. The return values are the same as for <b>pcre_exec()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
When you call <b>pcre_exec()</b>, as well as testing for invalid options, a
|
||||||
|
number of other sanity checks are performed on the arguments. For example, if
|
||||||
|
the subject pointer is NULL, or its length is negative, an immediate error is
|
||||||
|
given. Also, unless PCRE_NO_UTF[8|16|32] is set, a UTF subject string is tested
|
||||||
|
for validity. In the interests of speed, these checks do not happen on the JIT
|
||||||
|
fast path, and if invalid data is passed, the result is undefined.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Bypassing the sanity checks and the <b>pcre_exec()</b> wrapping can give
|
||||||
|
speedups of more than 10%.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
|
||||||
|
<P>
|
||||||
|
<b>pcreapi</b>(3)
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
|
||||||
|
<P>
|
||||||
|
Philip Hazel (FAQ by Zoltan Herczeg)
|
||||||
|
<br>
|
||||||
|
University Computing Service
|
||||||
|
<br>
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
<br>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||||
|
<P>
|
||||||
|
Last updated: 31 October 2012
|
||||||
|
<br>
|
||||||
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
|
<br>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
86
tools/pcre/doc/html/pcrelimits.html
Normal file
86
tools/pcre/doc/html/pcrelimits.html
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcrelimits specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcrelimits man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SIZE AND OTHER LIMITATIONS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
There are some size limitations in PCRE but it is hoped that they will never in
|
||||||
|
practice be relevant.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
||||||
|
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
|
||||||
|
the 32-bit library) if PCRE is compiled with the default internal linkage size
|
||||||
|
of 2 bytes. If you want to process regular expressions that are truly enormous,
|
||||||
|
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
||||||
|
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
|
||||||
|
the source distribution and the
|
||||||
|
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||||
|
documentation for details. In these cases the limit is substantially larger.
|
||||||
|
However, the speed of execution is slower.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
All values in repeating quantifiers must be less than 65536.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||||
|
no more than 65535 capturing subpatterns.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a limit to the number of forward references to subsequent subpatterns
|
||||||
|
of around 200,000. Repeated forward references with fixed upper limits, for
|
||||||
|
example, (?2){0,100} when subpattern number 2 is to the right, are included in
|
||||||
|
the count. There is no limit to the number of backward references.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The maximum length of name for a named subpattern is 32 characters, and the
|
||||||
|
maximum number of named subpatterns is 10000.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
|
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The maximum length of a subject string is the largest positive number that an
|
||||||
|
integer variable can hold. However, when using the traditional matching
|
||||||
|
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
|
||||||
|
This means that the available stack space may limit the size of a subject
|
||||||
|
string that can be processed by certain patterns. For a discussion of stack
|
||||||
|
issues, see the
|
||||||
|
<a href="pcrestack.html"><b>pcrestack</b></a>
|
||||||
|
documentation.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
AUTHOR
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Philip Hazel
|
||||||
|
<br>
|
||||||
|
University Computing Service
|
||||||
|
<br>
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
<br>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
REVISION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Last updated: 04 May 2012
|
||||||
|
<br>
|
||||||
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
|
<br>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -26,13 +26,17 @@ man page, in case the conversion went wrong.
|
|||||||
<P>
|
<P>
|
||||||
This document describes the two different algorithms that are available in PCRE
|
This document describes the two different algorithms that are available in PCRE
|
||||||
for matching a compiled regular expression against a given subject string. The
|
for matching a compiled regular expression against a given subject string. The
|
||||||
"standard" algorithm is the one provided by the <b>pcre_exec()</b> function.
|
"standard" algorithm is the one provided by the <b>pcre_exec()</b>,
|
||||||
This works in the same was as Perl's matching function, and provides a
|
<b>pcre16_exec()</b> and <b>pcre32_exec()</b> functions. These work in the same
|
||||||
Perl-compatible matching operation.
|
as as Perl's matching function, and provide a Perl-compatible matching operation.
|
||||||
|
The just-in-time (JIT) optimization that is described in the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
documentation is compatible with these functions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
An alternative algorithm is provided by the <b>pcre_dfa_exec()</b> function;
|
An alternative algorithm is provided by the <b>pcre_dfa_exec()</b>,
|
||||||
this operates in a different way, and is not Perl-compatible. It has advantages
|
<b>pcre16_dfa_exec()</b> and <b>pcre32_dfa_exec()</b> functions; they operate in
|
||||||
|
a different way, and are not Perl-compatible. This alternative has advantages
|
||||||
and disadvantages compared with the standard algorithm, and these are described
|
and disadvantages compared with the standard algorithm, and these are described
|
||||||
below.
|
below.
|
||||||
</P>
|
</P>
|
||||||
@ -96,22 +100,28 @@ traditional finite state machine (it keeps multiple states active
|
|||||||
simultaneously).
|
simultaneously).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
Although the general principle of this matching algorithm is that it scans the
|
||||||
|
subject string only once, without backtracking, there is one exception: when a
|
||||||
|
lookaround assertion is encountered, the characters following or preceding the
|
||||||
|
current point have to be independently inspected.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
The scan continues until either the end of the subject is reached, or there are
|
The scan continues until either the end of the subject is reached, or there are
|
||||||
no more unterminated paths. At this point, terminated paths represent the
|
no more unterminated paths. At this point, terminated paths represent the
|
||||||
different matching possibilities (if there are none, the match has failed).
|
different matching possibilities (if there are none, the match has failed).
|
||||||
Thus, if there is more than one possible match, this algorithm finds all of
|
Thus, if there is more than one possible match, this algorithm finds all of
|
||||||
them, and in particular, it finds the longest. In PCRE, there is an option to
|
them, and in particular, it finds the longest. The matches are returned in
|
||||||
stop the algorithm after the first match (which is necessarily the shortest)
|
decreasing order of length. There is an option to stop the algorithm after the
|
||||||
has been found.
|
first match (which is necessarily the shortest) is found.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Note that all the matches that are found start at the same point in the
|
Note that all the matches that are found start at the same point in the
|
||||||
subject. If the pattern
|
subject. If the pattern
|
||||||
<pre>
|
<pre>
|
||||||
cat(er(pillar)?)
|
cat(er(pillar)?)?
|
||||||
</pre>
|
</pre>
|
||||||
is matched against the string "the caterpillar catchment", the result will be
|
is matched against the string "the caterpillar catchment", the result will be
|
||||||
the three strings "cat", "cater", and "caterpillar" that start at the fourth
|
the three strings "caterpillar", "cater", and "cat" that start at the fifth
|
||||||
character of the subject. The algorithm does not automatically move on to find
|
character of the subject. The algorithm does not automatically move on to find
|
||||||
matches that start at later positions.
|
matches that start at later positions.
|
||||||
</P>
|
</P>
|
||||||
@ -157,10 +167,10 @@ and not on others), is not supported. It causes an error if encountered.
|
|||||||
always 1, and the value of the <i>capture_last</i> field is always -1.
|
always 1, and the value of the <i>capture_last</i> field is always -1.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
7. The \C escape sequence, which (in the standard algorithm) matches a single
|
7. The \C escape sequence, which (in the standard algorithm) always matches a
|
||||||
byte, even in UTF-8 mode, is not supported because the alternative algorithm
|
single data unit, even in UTF-8, UTF-16 or UTF-32 modes, is not supported in
|
||||||
moves through the subject string one character at a time, for all active paths
|
these modes, because the alternative algorithm moves through the subject string
|
||||||
through the tree.
|
one character (not data unit) at a time, for all active paths through the tree.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
|
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
|
||||||
@ -177,16 +187,15 @@ match using the standard algorithm, you have to do kludgy things with
|
|||||||
callouts.
|
callouts.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
2. There is much better support for partial matching. The restrictions on the
|
2. Because the alternative algorithm scans the subject string just once, and
|
||||||
content of the pattern that apply when using the standard algorithm for partial
|
never needs to backtrack (except for lookbehinds), it is possible to pass very
|
||||||
matching do not apply to the alternative algorithm. For non-anchored patterns,
|
long subject strings to the matching function in several pieces, checking for
|
||||||
the starting position of a partial match is available.
|
partial matching each time. Although it is possible to do multi-segment
|
||||||
</P>
|
matching using the standard algorithm by retaining partially matched
|
||||||
<P>
|
substrings, it is more complicated. The
|
||||||
3. Because the alternative algorithm scans the subject string just once, and
|
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||||
never needs to backtrack, it is possible to pass very long subject strings to
|
documentation gives details of partial matching and discusses multi-segment
|
||||||
the matching function in several pieces, checking for partial matching each
|
matching.
|
||||||
time.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
|
<br><a name="SEC6" href="#TOC1">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -215,9 +224,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 19 April 2008
|
Last updated: 08 January 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2008 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -14,19 +14,24 @@ man page, in case the conversion went wrong.
|
|||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE</a>
|
<li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE</a>
|
||||||
<li><a name="TOC2" href="#SEC2">RESTRICTED PATTERNS FOR PCRE_PARTIAL</a>
|
<li><a name="TOC2" href="#SEC2">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a>
|
||||||
<li><a name="TOC3" href="#SEC3">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a>
|
<li><a name="TOC3" href="#SEC3">PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a>
|
||||||
<li><a name="TOC4" href="#SEC4">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a>
|
<li><a name="TOC4" href="#SEC4">PARTIAL MATCHING AND WORD BOUNDARIES</a>
|
||||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
<li><a name="TOC5" href="#SEC5">FORMERLY RESTRICTED PATTERNS</a>
|
||||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
<li><a name="TOC6" href="#SEC6">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a>
|
||||||
|
<li><a name="TOC7" href="#SEC7">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a>
|
||||||
|
<li><a name="TOC8" href="#SEC8">MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()</a>
|
||||||
|
<li><a name="TOC9" href="#SEC9">ISSUES WITH MULTI-SEGMENT MATCHING</a>
|
||||||
|
<li><a name="TOC10" href="#SEC10">AUTHOR</a>
|
||||||
|
<li><a name="TOC11" href="#SEC11">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE</a><br>
|
<br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE</a><br>
|
||||||
<P>
|
<P>
|
||||||
In normal use of PCRE, if the subject string that is passed to
|
In normal use of PCRE, if the subject string that is passed to a matching
|
||||||
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> matches as far as it goes, but is
|
function matches as far as it goes, but is too short to match the entire
|
||||||
too short to match the entire pattern, PCRE_ERROR_NOMATCH is returned. There
|
pattern, PCRE_ERROR_NOMATCH is returned. There are circumstances where it might
|
||||||
are circumstances where it might be helpful to distinguish this case from other
|
be helpful to distinguish this case from other cases in which there is no
|
||||||
cases in which there is no match.
|
match.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Consider, for example, an application where a human is required to type in data
|
Consider, for example, an application where a human is required to type in data
|
||||||
@ -37,78 +42,230 @@ in the form <i>ddmmmyy</i>, defined by this pattern:
|
|||||||
</pre>
|
</pre>
|
||||||
If the application sees the user's keystrokes one by one, and can check that
|
If the application sees the user's keystrokes one by one, and can check that
|
||||||
what has been typed so far is potentially valid, it is able to raise an error
|
what has been typed so far is potentially valid, it is able to raise an error
|
||||||
as soon as a mistake is made, possibly beeping and not reflecting the
|
as soon as a mistake is made, by beeping and not reflecting the character that
|
||||||
character that has been typed. This immediate feedback is likely to be a better
|
has been typed, for example. This immediate feedback is likely to be a better
|
||||||
user interface than a check that is delayed until the entire string has been
|
user interface than a check that is delayed until the entire string has been
|
||||||
entered.
|
entered. Partial matching can also be useful when the subject string is very
|
||||||
|
long and is not all available at once.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
PCRE supports the concept of partial matching by means of the PCRE_PARTIAL
|
PCRE supports partial matching by means of the PCRE_PARTIAL_SOFT and
|
||||||
option, which can be set when calling <b>pcre_exec()</b> or
|
PCRE_PARTIAL_HARD options, which can be set when calling any of the matching
|
||||||
<b>pcre_dfa_exec()</b>. When this flag is set for <b>pcre_exec()</b>, the return
|
functions. For backwards compatibility, PCRE_PARTIAL is a synonym for
|
||||||
code PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if at any time
|
PCRE_PARTIAL_SOFT. The essential difference between the two options is whether
|
||||||
during the matching process the last part of the subject string matched part of
|
or not a partial match is preferred to an alternative complete match, though
|
||||||
the pattern. Unfortunately, for non-anchored matching, it is not possible to
|
the details differ between the two types of matching function. If both options
|
||||||
obtain the position of the start of the partial match. No captured data is set
|
are set, PCRE_PARTIAL_HARD takes precedence.
|
||||||
when PCRE_ERROR_PARTIAL is returned.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When PCRE_PARTIAL is set for <b>pcre_dfa_exec()</b>, the return code
|
If you want to use partial matching with just-in-time optimized code, you must
|
||||||
PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if the end of the
|
call <b>pcre_study()</b>, <b>pcre16_study()</b> or <b>pcre32_study()</b> with one
|
||||||
subject is reached, there have been no complete matches, but there is still at
|
or both of these options:
|
||||||
least one matching possibility. The portion of the string that provided the
|
|
||||||
partial match is set as the first matching string.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Using PCRE_PARTIAL disables one of PCRE's optimizations. PCRE remembers the
|
|
||||||
last literal byte in a pattern, and abandons matching immediately if such a
|
|
||||||
byte is not present in the subject string. This optimization cannot be used
|
|
||||||
for a subject string that might match only partially.
|
|
||||||
</P>
|
|
||||||
<br><a name="SEC2" href="#TOC1">RESTRICTED PATTERNS FOR PCRE_PARTIAL</a><br>
|
|
||||||
<P>
|
|
||||||
Because of the way certain internal optimizations are implemented in the
|
|
||||||
<b>pcre_exec()</b> function, the PCRE_PARTIAL option cannot be used with all
|
|
||||||
patterns. These restrictions do not apply when <b>pcre_dfa_exec()</b> is used.
|
|
||||||
For <b>pcre_exec()</b>, repeated single characters such as
|
|
||||||
<pre>
|
<pre>
|
||||||
a{2,4}
|
PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
|
||||||
|
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
|
||||||
</pre>
|
</pre>
|
||||||
and repeated single metasequences such as
|
PCRE_STUDY_JIT_COMPILE should also be set if you are going to run non-partial
|
||||||
<pre>
|
matches on the same pattern. If the appropriate JIT study mode has not been set
|
||||||
\d+
|
for a match, the interpretive matching code is used.
|
||||||
</pre>
|
|
||||||
are not permitted if the maximum number of occurrences is greater than one.
|
|
||||||
Optional items such as \d? (where the maximum is one) are permitted.
|
|
||||||
Quantifiers with any values are permitted after parentheses, so the invalid
|
|
||||||
examples above can be coded thus:
|
|
||||||
<pre>
|
|
||||||
(a){2,4}
|
|
||||||
(\d)+
|
|
||||||
</pre>
|
|
||||||
These constructions run more slowly, but for the kinds of application that are
|
|
||||||
envisaged for this facility, this is not felt to be a major restriction.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If PCRE_PARTIAL is set for a pattern that does not conform to the restrictions,
|
Setting a partial matching option disables two of PCRE's standard
|
||||||
<b>pcre_exec()</b> returns the error code PCRE_ERROR_BADPARTIAL (-13).
|
optimizations. PCRE remembers the last literal data unit in a pattern, and
|
||||||
You can use the PCRE_INFO_OKPARTIAL call to <b>pcre_fullinfo()</b> to find out
|
abandons matching immediately if it is not present in the subject string. This
|
||||||
if a compiled pattern can be used for partial matching.
|
optimization cannot be used for a subject string that might match only
|
||||||
|
partially. If the pattern was studied, PCRE knows the minimum length of a
|
||||||
|
matching string, and does not bother to run the matching function on shorter
|
||||||
|
strings. This optimization is also disabled for partial matching.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a><br>
|
<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a><br>
|
||||||
|
<P>
|
||||||
|
A partial match occurs during a call to <b>pcre_exec()</b> or
|
||||||
|
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached successfully,
|
||||||
|
but matching cannot continue because more characters are needed. However, at
|
||||||
|
least one character in the subject must have been inspected. This character
|
||||||
|
need not form part of the final matched string; lookbehind assertions and the
|
||||||
|
\K escape sequence provide ways of inspecting characters before the start of a
|
||||||
|
matched substring. The requirement for inspecting at least one character exists
|
||||||
|
because an empty string can always be matched; without such a restriction there
|
||||||
|
would always be a partial match of an empty string at the end of the subject.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If there are at least two slots in the offsets vector when a partial match is
|
||||||
|
returned, the first slot is set to the offset of the earliest character that
|
||||||
|
was inspected. For convenience, the second offset points to the end of the
|
||||||
|
subject so that a substring can easily be identified.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
For the majority of patterns, the first offset identifies the start of the
|
||||||
|
partially matched string. However, for patterns that contain lookbehind
|
||||||
|
assertions, or \K, or begin with \b or \B, earlier characters have been
|
||||||
|
inspected while carrying out the match. For example:
|
||||||
|
<pre>
|
||||||
|
/(?<=abc)123/
|
||||||
|
</pre>
|
||||||
|
This pattern matches "123", but only if it is preceded by "abc". If the subject
|
||||||
|
string is "xyzabc12", the offsets after a partial match are for the substring
|
||||||
|
"abc12", because all these characters are needed if another match is tried
|
||||||
|
with extra characters added to the subject.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
What happens when a partial match is identified depends on which of the two
|
||||||
|
partial matching options are set.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
PCRE_PARTIAL_SOFT WITH pcre_exec() OR pcre[16|32]_exec()
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If PCRE_PARTIAL_SOFT is set when <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b>
|
||||||
|
identifies a partial match, the partial match is remembered, but matching
|
||||||
|
continues as normal, and other alternatives in the pattern are tried. If no
|
||||||
|
complete match can be found, PCRE_ERROR_PARTIAL is returned instead of
|
||||||
|
PCRE_ERROR_NOMATCH.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
This option is "soft" because it prefers a complete match over a partial match.
|
||||||
|
All the various matching items in a pattern behave as if the subject string is
|
||||||
|
potentially complete. For example, \z, \Z, and $ match at the end of the
|
||||||
|
subject, as normal, and for \b and \B the end of the subject is treated as a
|
||||||
|
non-alphanumeric.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If there is more than one partial match, the first one that was found provides
|
||||||
|
the data that is returned. Consider this pattern:
|
||||||
|
<pre>
|
||||||
|
/123\w+X|dogY/
|
||||||
|
</pre>
|
||||||
|
If this is matched against the subject string "abc123dog", both
|
||||||
|
alternatives fail to match, but the end of the subject is reached during
|
||||||
|
matching, so PCRE_ERROR_PARTIAL is returned. The offsets are set to 3 and 9,
|
||||||
|
identifying "123dog" as the first partial match that was found. (In this
|
||||||
|
example, there are two partial matches, because "dog" on its own partially
|
||||||
|
matches the second alternative.)
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
PCRE_PARTIAL_HARD WITH pcre_exec() OR pcre[16|32]_exec()
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If PCRE_PARTIAL_HARD is set for <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b>,
|
||||||
|
PCRE_ERROR_PARTIAL is returned as soon as a partial match is found, without
|
||||||
|
continuing to search for possible complete matches. This option is "hard"
|
||||||
|
because it prefers an earlier partial match over a later complete match. For
|
||||||
|
this reason, the assumption is made that the end of the supplied subject string
|
||||||
|
may not be the true end of the available data, and so, if \z, \Z, \b, \B,
|
||||||
|
or $ are encountered at the end of the subject, the result is
|
||||||
|
PCRE_ERROR_PARTIAL, provided that at least one character in the subject has
|
||||||
|
been inspected.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Setting PCRE_PARTIAL_HARD also affects the way UTF-8 and UTF-16
|
||||||
|
subject strings are checked for validity. Normally, an invalid sequence
|
||||||
|
causes the error PCRE_ERROR_BADUTF8 or PCRE_ERROR_BADUTF16. However, in the
|
||||||
|
special case of a truncated character at the end of the subject,
|
||||||
|
PCRE_ERROR_SHORTUTF8 or PCRE_ERROR_SHORTUTF16 is returned when
|
||||||
|
PCRE_PARTIAL_HARD is set.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Comparing hard and soft partial matching
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The difference between the two partial matching options can be illustrated by a
|
||||||
|
pattern such as:
|
||||||
|
<pre>
|
||||||
|
/dog(sbody)?/
|
||||||
|
</pre>
|
||||||
|
This matches either "dog" or "dogsbody", greedily (that is, it prefers the
|
||||||
|
longer string if possible). If it is matched against the string "dog" with
|
||||||
|
PCRE_PARTIAL_SOFT, it yields a complete match for "dog". However, if
|
||||||
|
PCRE_PARTIAL_HARD is set, the result is PCRE_ERROR_PARTIAL. On the other hand,
|
||||||
|
if the pattern is made ungreedy the result is different:
|
||||||
|
<pre>
|
||||||
|
/dog(sbody)??/
|
||||||
|
</pre>
|
||||||
|
In this case the result is always a complete match because that is found first,
|
||||||
|
and matching never continues after finding a complete match. It might be easier
|
||||||
|
to follow this explanation by thinking of the two patterns like this:
|
||||||
|
<pre>
|
||||||
|
/dog(sbody)?/ is the same as /dogsbody|dog/
|
||||||
|
/dog(sbody)??/ is the same as /dog|dogsbody/
|
||||||
|
</pre>
|
||||||
|
The second pattern will never match "dogsbody", because it will always find the
|
||||||
|
shorter match first.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a><br>
|
||||||
|
<P>
|
||||||
|
The DFA functions move along the subject string character by character, without
|
||||||
|
backtracking, searching for all possible matches simultaneously. If the end of
|
||||||
|
the subject is reached before the end of the pattern, there is the possibility
|
||||||
|
of a partial match, again provided that at least one character has been
|
||||||
|
inspected.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
When PCRE_PARTIAL_SOFT is set, PCRE_ERROR_PARTIAL is returned only if there
|
||||||
|
have been no complete matches. Otherwise, the complete matches are returned.
|
||||||
|
However, if PCRE_PARTIAL_HARD is set, a partial match takes precedence over any
|
||||||
|
complete matches. The portion of the string that was inspected when the longest
|
||||||
|
partial match was found is set as the first matching string, provided there are
|
||||||
|
at least two slots in the offsets vector.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Because the DFA functions always search for all possible matches, and there is
|
||||||
|
no difference between greedy and ungreedy repetition, their behaviour is
|
||||||
|
different from the standard functions when PCRE_PARTIAL_HARD is set. Consider
|
||||||
|
the string "dog" matched against the ungreedy pattern shown above:
|
||||||
|
<pre>
|
||||||
|
/dog(sbody)??/
|
||||||
|
</pre>
|
||||||
|
Whereas the standard functions stop as soon as they find the complete match for
|
||||||
|
"dog", the DFA functions also find the partial match for "dogsbody", and so
|
||||||
|
return that when PCRE_PARTIAL_HARD is set.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC4" href="#TOC1">PARTIAL MATCHING AND WORD BOUNDARIES</a><br>
|
||||||
|
<P>
|
||||||
|
If a pattern ends with one of sequences \b or \B, which test for word
|
||||||
|
boundaries, partial matching with PCRE_PARTIAL_SOFT can give counter-intuitive
|
||||||
|
results. Consider this pattern:
|
||||||
|
<pre>
|
||||||
|
/\bcat\b/
|
||||||
|
</pre>
|
||||||
|
This matches "cat", provided there is a word boundary at either end. If the
|
||||||
|
subject string is "the cat", the comparison of the final "t" with a following
|
||||||
|
character cannot take place, so a partial match is found. However, normal
|
||||||
|
matching carries on, and \b matches at the end of the subject when the last
|
||||||
|
character is a letter, so a complete match is found. The result, therefore, is
|
||||||
|
<i>not</i> PCRE_ERROR_PARTIAL. Using PCRE_PARTIAL_HARD in this case does yield
|
||||||
|
PCRE_ERROR_PARTIAL, because then the partial match takes precedence.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC5" href="#TOC1">FORMERLY RESTRICTED PATTERNS</a><br>
|
||||||
|
<P>
|
||||||
|
For releases of PCRE prior to 8.00, because of the way certain internal
|
||||||
|
optimizations were implemented in the <b>pcre_exec()</b> function, the
|
||||||
|
PCRE_PARTIAL option (predecessor of PCRE_PARTIAL_SOFT) could not be used with
|
||||||
|
all patterns. From release 8.00 onwards, the restrictions no longer apply, and
|
||||||
|
partial matching with can be requested for any pattern.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Items that were formerly restricted were repeated single characters and
|
||||||
|
repeated metasequences. If PCRE_PARTIAL was set for a pattern that did not
|
||||||
|
conform to the restrictions, <b>pcre_exec()</b> returned the error code
|
||||||
|
PCRE_ERROR_BADPARTIAL (-13). This error code is no longer in use. The
|
||||||
|
PCRE_INFO_OKPARTIAL call to <b>pcre_fullinfo()</b> to find out if a compiled
|
||||||
|
pattern can be used for partial matching now always returns 1.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC6" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a><br>
|
||||||
<P>
|
<P>
|
||||||
If the escape sequence \P is present in a <b>pcretest</b> data line, the
|
If the escape sequence \P is present in a <b>pcretest</b> data line, the
|
||||||
PCRE_PARTIAL flag is used for the match. Here is a run of <b>pcretest</b> that
|
PCRE_PARTIAL_SOFT option is used for the match. Here is a run of <b>pcretest</b>
|
||||||
uses the date example quoted above:
|
that uses the date example quoted above:
|
||||||
<pre>
|
<pre>
|
||||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
||||||
data> 25jun04\P
|
data> 25jun04\P
|
||||||
0: 25jun04
|
0: 25jun04
|
||||||
1: jun
|
1: jun
|
||||||
data> 25dec3\P
|
data> 25dec3\P
|
||||||
Partial match
|
Partial match: 23dec3
|
||||||
data> 3ju\P
|
data> 3ju\P
|
||||||
Partial match
|
Partial match: 3ju
|
||||||
data> 3juj\P
|
data> 3juj\P
|
||||||
No match
|
No match
|
||||||
data> j\P
|
data> j\P
|
||||||
@ -116,34 +273,22 @@ uses the date example quoted above:
|
|||||||
</pre>
|
</pre>
|
||||||
The first data string is matched completely, so <b>pcretest</b> shows the
|
The first data string is matched completely, so <b>pcretest</b> shows the
|
||||||
matched substrings. The remaining four strings do not match the complete
|
matched substrings. The remaining four strings do not match the complete
|
||||||
pattern, but the first two are partial matches. The same test, using
|
pattern, but the first two are partial matches. Similar output is obtained
|
||||||
<b>pcre_dfa_exec()</b> matching (by means of the \D escape sequence), produces
|
if DFA matching is used.
|
||||||
the following output:
|
|
||||||
<pre>
|
|
||||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
|
||||||
data> 25jun04\P\D
|
|
||||||
0: 25jun04
|
|
||||||
data> 23dec3\P\D
|
|
||||||
Partial match: 23dec3
|
|
||||||
data> 3ju\P\D
|
|
||||||
Partial match: 3ju
|
|
||||||
data> 3juj\P\D
|
|
||||||
No match
|
|
||||||
data> j\P\D
|
|
||||||
No match
|
|
||||||
</pre>
|
|
||||||
Notice that in this case the portion of the string that was matched is made
|
|
||||||
available.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a><br>
|
|
||||||
<P>
|
<P>
|
||||||
When a partial match has been found using <b>pcre_dfa_exec()</b>, it is possible
|
If the escape sequence \P is present more than once in a <b>pcretest</b> data
|
||||||
to continue the match by providing additional subject data and calling
|
line, the PCRE_PARTIAL_HARD option is set for the match.
|
||||||
<b>pcre_dfa_exec()</b> again with the same compiled regular expression, this
|
</P>
|
||||||
time setting the PCRE_DFA_RESTART option. You must also pass the same working
|
<br><a name="SEC7" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a><br>
|
||||||
space as before, because this is where details of the previous partial match
|
<P>
|
||||||
are stored. Here is an example using <b>pcretest</b>, using the \R escape
|
When a partial match has been found using a DFA matching function, it is
|
||||||
sequence to set the PCRE_DFA_RESTART option (\P and \D are as above):
|
possible to continue the match by providing additional subject data and calling
|
||||||
|
the function again with the same compiled regular expression, this time setting
|
||||||
|
the PCRE_DFA_RESTART option. You must pass the same working space as before,
|
||||||
|
because this is where details of the previous partial match are stored. Here is
|
||||||
|
an example using <b>pcretest</b>, using the \R escape sequence to set the
|
||||||
|
PCRE_DFA_RESTART option (\D specifies the use of the DFA matching function):
|
||||||
<pre>
|
<pre>
|
||||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
||||||
data> 23ja\P\D
|
data> 23ja\P\D
|
||||||
@ -158,33 +303,94 @@ not retain the previously partially-matched string. It is up to the calling
|
|||||||
program to do that if it needs to.
|
program to do that if it needs to.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
You can set PCRE_PARTIAL with PCRE_DFA_RESTART to continue partial matching
|
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
||||||
over multiple segments. This facility can be used to pass very long subject
|
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
||||||
strings to <b>pcre_dfa_exec()</b>. However, some care is needed for certain
|
facility can be used to pass very long subject strings to the DFA matching
|
||||||
types of pattern.
|
functions.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC8" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()</a><br>
|
||||||
|
<P>
|
||||||
|
From release 8.00, the standard matching functions can also be used to do
|
||||||
|
multi-segment matching. Unlike the DFA functions, it is not possible to
|
||||||
|
restart the previous match with a new segment of data. Instead, new data must
|
||||||
|
be added to the previous subject string, and the entire match re-run, starting
|
||||||
|
from the point where the partial match occurred. Earlier data can be discarded.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
1. If the pattern contains tests for the beginning or end of a line, you need
|
It is best to use PCRE_PARTIAL_HARD in this situation, because it does not
|
||||||
to pass the PCRE_NOTBOL or PCRE_NOTEOL options, as appropriate, when the
|
treat the end of a segment as the end of the subject when matching \z, \Z,
|
||||||
subject string for any call does not contain the beginning or end of a line.
|
\b, \B, and $. Consider an unanchored pattern that matches dates:
|
||||||
|
<pre>
|
||||||
|
re> /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
|
||||||
|
data> The date is 23ja\P\P
|
||||||
|
Partial match: 23ja
|
||||||
|
</pre>
|
||||||
|
At this stage, an application could discard the text preceding "23ja", add on
|
||||||
|
text from the next segment, and call the matching function again. Unlike the
|
||||||
|
DFA matching functions, the entire matching string must always be available,
|
||||||
|
and the complete matching process occurs for each call, so more memory and more
|
||||||
|
processing time is needed.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
2. If the pattern contains backward assertions (including \b or \B), you need
|
<b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
|
||||||
to arrange for some overlap in the subject strings to allow for this. For
|
with \b or \B, the string that is returned for a partial match includes
|
||||||
example, you could pass the subject in chunks that are 500 bytes long, but in
|
characters that precede the partially matched string itself, because these must
|
||||||
a buffer of 700 bytes, with the starting offset set to 200 and the previous 200
|
be retained when adding on more characters for a subsequent matching attempt.
|
||||||
bytes at the start of the buffer.
|
However, in some cases you may need to retain even earlier characters, as
|
||||||
|
discussed in the next section.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
|
||||||
|
<P>
|
||||||
|
Certain types of pattern may give problems with multi-segment matching,
|
||||||
|
whichever matching function is used.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
3. Matching a subject string that is split into multiple segments does not
|
1. If the pattern contains a test for the beginning of a line, you need to pass
|
||||||
always produce exactly the same result as matching over one single long string.
|
the PCRE_NOTBOL option when the subject string for any call does start at the
|
||||||
The difference arises when there are multiple matching possibilities, because a
|
beginning of a line. There is also a PCRE_NOTEOL option, but in practice when
|
||||||
partial match result is given only when there are no completed matches in a
|
doing multi-segment matching you should be using PCRE_PARTIAL_HARD, which
|
||||||
call to <b>pcre_dfa_exec()</b>. This means that as soon as the shortest match has
|
includes the effect of PCRE_NOTEOL.
|
||||||
been found, continuation to a new subject segment is no longer possible.
|
</P>
|
||||||
Consider this <b>pcretest</b> example:
|
<P>
|
||||||
|
2. Lookbehind assertions that have already been obeyed are catered for in the
|
||||||
|
offsets that are returned for a partial match. However a lookbehind assertion
|
||||||
|
later in the pattern could require even earlier characters to be inspected. You
|
||||||
|
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
|
||||||
|
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the length
|
||||||
|
of the largest lookbehind in the pattern. This length is given in characters,
|
||||||
|
not bytes. If you always retain at least that many characters before the
|
||||||
|
partially matched string, all should be well. (Of course, near the start of the
|
||||||
|
subject, fewer characters may be present; in that case all characters should be
|
||||||
|
retained.)
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
3. Because a partial match must always contain at least one character, what
|
||||||
|
might be considered a partial match of an empty string actually gives a "no
|
||||||
|
match" result. For example:
|
||||||
|
<pre>
|
||||||
|
re> /c(?<=abc)x/
|
||||||
|
data> ab\P
|
||||||
|
No match
|
||||||
|
</pre>
|
||||||
|
If the next segment begins "cx", a match should be found, but this will only
|
||||||
|
happen if characters from the previous segment are retained. For this reason, a
|
||||||
|
"no match" result should be interpreted as "partial match of an empty string"
|
||||||
|
when the pattern contains lookbehinds.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
4. Matching a subject string that is split into multiple segments may not
|
||||||
|
always produce exactly the same result as matching over one single long string,
|
||||||
|
especially when PCRE_PARTIAL_SOFT is used. The section "Partial Matching and
|
||||||
|
Word Boundaries" above describes an issue that arises if the pattern ends with
|
||||||
|
\b or \B. Another kind of difference may occur when there are multiple
|
||||||
|
matching possibilities, because (for PCRE_PARTIAL_SOFT) a partial match result
|
||||||
|
is given only when there are no completed matches. This means that as soon as
|
||||||
|
the shortest match has been found, continuation to a new subject segment is no
|
||||||
|
longer possible. Consider again this <b>pcretest</b> example:
|
||||||
<pre>
|
<pre>
|
||||||
re> /dog(sbody)?/
|
re> /dog(sbody)?/
|
||||||
|
data> dogsb\P
|
||||||
|
0: dog
|
||||||
data> do\P\D
|
data> do\P\D
|
||||||
Partial match: do
|
Partial match: do
|
||||||
data> gsb\R\P\D
|
data> gsb\R\P\D
|
||||||
@ -193,26 +399,37 @@ Consider this <b>pcretest</b> example:
|
|||||||
0: dogsbody
|
0: dogsbody
|
||||||
1: dog
|
1: dog
|
||||||
</pre>
|
</pre>
|
||||||
The pattern matches the words "dog" or "dogsbody". When the subject is
|
The first data line passes the string "dogsb" to a standard matching function,
|
||||||
presented in several parts ("do" and "gsb" being the first two) the match stops
|
setting the PCRE_PARTIAL_SOFT option. Although the string is a partial match
|
||||||
when "dog" has been found, and it is not possible to continue. On the other
|
for "dogsbody", the result is not PCRE_ERROR_PARTIAL, because the shorter
|
||||||
hand, if "dogsbody" is presented as a single string, both matches are found.
|
string "dog" is a complete match. Similarly, when the subject is presented to
|
||||||
|
a DFA matching function in several parts ("do" and "gsb" being the first two)
|
||||||
|
the match stops when "dog" has been found, and it is not possible to continue.
|
||||||
|
On the other hand, if "dogsbody" is presented as a single string, a DFA
|
||||||
|
matching function finds both matches.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Because of this phenomenon, it does not usually make sense to end a pattern
|
Because of these problems, it is best to use PCRE_PARTIAL_HARD when matching
|
||||||
that is going to be matched in this way with a variable repeat.
|
multi-segment data. The example above then behaves differently:
|
||||||
</P>
|
<pre>
|
||||||
<P>
|
re> /dog(sbody)?/
|
||||||
4. Patterns that contain alternatives at the top level which do not all
|
data> dogsb\P\P
|
||||||
start with the same pattern item may not work as expected. For example,
|
Partial match: dogsb
|
||||||
consider this pattern:
|
data> do\P\D
|
||||||
|
Partial match: do
|
||||||
|
data> gsb\R\P\P\D
|
||||||
|
Partial match: gsb
|
||||||
|
</pre>
|
||||||
|
5. Patterns that contain alternatives at the top level which do not all start
|
||||||
|
with the same pattern item may not work as expected when PCRE_DFA_RESTART is
|
||||||
|
used. For example, consider this pattern:
|
||||||
<pre>
|
<pre>
|
||||||
1234|3789
|
1234|3789
|
||||||
</pre>
|
</pre>
|
||||||
If the first part of the subject is "ABC123", a partial match of the first
|
If the first part of the subject is "ABC123", a partial match of the first
|
||||||
alternative is found at offset 3. There is no partial match for the second
|
alternative is found at offset 3. There is no partial match for the second
|
||||||
alternative, because such a match does not start at the same point in the
|
alternative, because such a match does not start at the same point in the
|
||||||
subject string. Attempting to continue with the string "789" does not yield a
|
subject string. Attempting to continue with the string "7890" does not yield a
|
||||||
match because only those alternatives that match at one point in the subject
|
match because only those alternatives that match at one point in the subject
|
||||||
are remembered. The problem arises because the start of the second alternative
|
are remembered. The problem arises because the start of the second alternative
|
||||||
matches within the first alternative. There is no problem with anchored
|
matches within the first alternative. There is no problem with anchored
|
||||||
@ -220,9 +437,24 @@ patterns or patterns such as:
|
|||||||
<pre>
|
<pre>
|
||||||
1234|ABCD
|
1234|ABCD
|
||||||
</pre>
|
</pre>
|
||||||
where no string can be a partial match for both alternatives.
|
where no string can be a partial match for both alternatives. This is not a
|
||||||
|
problem if a standard matching function is used, because the entire match has
|
||||||
|
to be rerun each time:
|
||||||
|
<pre>
|
||||||
|
re> /1234|3789/
|
||||||
|
data> ABC123\P\P
|
||||||
|
Partial match: 123
|
||||||
|
data> 1237890
|
||||||
|
0: 3789
|
||||||
|
</pre>
|
||||||
|
Of course, instead of using PCRE_DFA_RESTART, the same technique of re-running
|
||||||
|
the entire match can also be used with the DFA matching functions. Another
|
||||||
|
possibility is to work with two buffers. If a partial match at offset <i>n</i>
|
||||||
|
in the first buffer is followed by "no match" when PCRE_DFA_RESTART is used on
|
||||||
|
the second buffer, you can then try a new match starting at offset <i>n+1</i> in
|
||||||
|
the first buffer.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC10" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -231,11 +463,11 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 04 June 2007
|
Last updated: 24 June 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2007 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -21,14 +21,15 @@ time. The way you express your pattern as a regular expression can affect both
|
|||||||
of them.
|
of them.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
MEMORY USAGE
|
COMPILED PATTERN MEMORY USAGE
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Patterns are compiled by PCRE into a reasonably efficient byte code, so that
|
Patterns are compiled by PCRE into a reasonably efficient interpretive code, so
|
||||||
most simple patterns do not use much memory. However, there is one case where
|
that most simple patterns do not use much memory. However, there is one case
|
||||||
memory usage can be unexpectedly large. When a parenthesized subpattern has a
|
where the memory usage of a compiled pattern can be unexpectedly large. If a
|
||||||
quantifier with a minimum greater than 1 and/or a limited maximum, the whole
|
parenthesized subpattern has a quantifier with a minimum greater than 1 and/or
|
||||||
subpattern is repeated in the compiled code. For example, the pattern
|
a limited maximum, the whole subpattern is repeated in the compiled code. For
|
||||||
|
example, the pattern
|
||||||
<pre>
|
<pre>
|
||||||
(abc|def){2,4}
|
(abc|def){2,4}
|
||||||
</pre>
|
</pre>
|
||||||
@ -47,12 +48,12 @@ example, the very simple pattern
|
|||||||
<pre>
|
<pre>
|
||||||
((ab){1,1000}c){1,3}
|
((ab){1,1000}c){1,3}
|
||||||
</pre>
|
</pre>
|
||||||
uses 51K bytes when compiled. When PCRE is compiled with its default internal
|
uses 51K bytes when compiled using the 8-bit library. When PCRE is compiled
|
||||||
pointer size of two bytes, the size limit on a compiled pattern is 64K, and
|
with its default internal pointer size of two bytes, the size limit on a
|
||||||
this is reached with the above pattern if the outer repetition is increased
|
compiled pattern is 64K data units, and this is reached with the above pattern
|
||||||
from 3 to 4. PCRE can be compiled to use larger internal pointers and thus
|
if the outer repetition is increased from 3 to 4. PCRE can be compiled to use
|
||||||
handle larger compiled patterns, but it is better to try to rewrite your
|
larger internal pointers and thus handle larger compiled patterns, but it is
|
||||||
pattern to use less memory if you can.
|
better to try to rewrite your pattern to use less memory if you can.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
One way of reducing the memory usage for such patterns is to make use of PCRE's
|
One way of reducing the memory usage for such patterns is to make use of PCRE's
|
||||||
@ -73,6 +74,18 @@ speed is acceptable, this kind of rewriting will allow you to process patterns
|
|||||||
that PCRE cannot otherwise handle.
|
that PCRE cannot otherwise handle.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
STACK USAGE AT RUN TIME
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
When <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> is used for matching, certain
|
||||||
|
kinds of pattern can cause it to use large amounts of the process stack. In
|
||||||
|
some environments the default process stack is quite small, and if it runs out
|
||||||
|
the result is often SIGSEGV. This issue is probably the most frequently raised
|
||||||
|
problem with PCRE. Rewriting your pattern can often help. The
|
||||||
|
<a href="pcrestack.html"><b>pcrestack</b></a>
|
||||||
|
documentation discusses this issue in detail.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
PROCESSING TIME
|
PROCESSING TIME
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
@ -86,10 +99,19 @@ contains a few observations about PCRE.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Using Unicode character properties (the \p, \P, and \X escapes) is slow,
|
Using Unicode character properties (the \p, \P, and \X escapes) is slow,
|
||||||
because PCRE has to scan a structure that contains data for over fifteen
|
because PCRE has to use a multi-stage table lookup whenever it needs a
|
||||||
thousand characters whenever it needs a character's property. If you can find
|
character's property. If you can find an alternative pattern that does not use
|
||||||
an alternative pattern that does not use character properties, it will probably
|
character properties, it will probably be faster.
|
||||||
be faster.
|
</P>
|
||||||
|
<P>
|
||||||
|
By default, the escape sequences \b, \d, \s, and \w, and the POSIX
|
||||||
|
character classes such as [:alpha:] do not use Unicode properties, partly for
|
||||||
|
backwards compatibility, and partly for performance reasons. However, you can
|
||||||
|
set PCRE_UCP if you want Unicode character properties to be used. This can
|
||||||
|
double the matching time for items such as \d, when matched with
|
||||||
|
a traditional matching function; the performance loss is less with
|
||||||
|
a DFA matching function, and in both cases there is not much difference for
|
||||||
|
\b.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When a pattern begins with .* not in parentheses, or in parentheses that are
|
When a pattern begins with .* not in parentheses, or in parentheses that are
|
||||||
@ -164,9 +186,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 06 March 2007
|
Last updated: 25 August 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2007 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -44,11 +44,12 @@ man page, in case the conversion went wrong.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
This set of functions provides a POSIX-style API to the PCRE regular expression
|
This set of functions provides a POSIX-style API for the PCRE regular
|
||||||
package. See the
|
expression 8-bit library. See the
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
documentation for a description of PCRE's native API, which contains much
|
documentation for a description of PCRE's native API, which contains much
|
||||||
additional functionality.
|
additional functionality. There is no POSIX-style wrapper for PCRE's 16-bit
|
||||||
|
and 32-bit library.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The functions described here are just wrapper functions that ultimately call
|
The functions described here are just wrapper functions that ultimately call
|
||||||
@ -66,6 +67,11 @@ POSIX interface often use it, this makes it easier to slot in PCRE as a
|
|||||||
replacement library. Other POSIX options are not even defined.
|
replacement library. Other POSIX options are not even defined.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
There are also some other options that are not defined by POSIX. These have
|
||||||
|
been added at the request of users who want to make use of certain
|
||||||
|
PCRE-specific features via the POSIX calling interface.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
When PCRE is called via these functions, it is only the API that is POSIX-like
|
When PCRE is called via these functions, it is only the API that is POSIX-like
|
||||||
in style. The syntax and semantics of the regular expressions themselves are
|
in style. The syntax and semantics of the regular expressions themselves are
|
||||||
still those of Perl, subject to the setting of various PCRE options, as
|
still those of Perl, subject to the setting of various PCRE options, as
|
||||||
@ -82,8 +88,6 @@ structure types, <i>regex_t</i> for compiled internal forms, and
|
|||||||
constants whose names start with "REG_"; these are used for setting options and
|
constants whose names start with "REG_"; these are used for setting options and
|
||||||
identifying error codes.
|
identifying error codes.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
|
||||||
</P>
|
|
||||||
<br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br>
|
<br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
The function <b>regcomp()</b> is called to compile a pattern into an
|
The function <b>regcomp()</b> is called to compile a pattern into an
|
||||||
@ -120,6 +124,19 @@ for compilation to the native function. In addition, when a pattern that is
|
|||||||
compiled with this flag is passed to <b>regexec()</b> for matching, the
|
compiled with this flag is passed to <b>regexec()</b> for matching, the
|
||||||
<i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no captured strings
|
<i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no captured strings
|
||||||
are returned.
|
are returned.
|
||||||
|
<pre>
|
||||||
|
REG_UCP
|
||||||
|
</pre>
|
||||||
|
The PCRE_UCP option is set when the regular expression is passed for
|
||||||
|
compilation to the native function. This causes PCRE to use Unicode properties
|
||||||
|
when matchine \d, \w, etc., instead of just recognizing ASCII values. Note
|
||||||
|
that REG_UTF8 is not part of the POSIX standard.
|
||||||
|
<pre>
|
||||||
|
REG_UNGREEDY
|
||||||
|
</pre>
|
||||||
|
The PCRE_UNGREEDY option is set when the regular expression is passed for
|
||||||
|
compilation to the native function. Note that REG_UNGREEDY is not part of the
|
||||||
|
POSIX standard.
|
||||||
<pre>
|
<pre>
|
||||||
REG_UTF8
|
REG_UTF8
|
||||||
</pre>
|
</pre>
|
||||||
@ -134,7 +151,7 @@ This means the the regex is compiled with PCRE default semantics. In
|
|||||||
particular, the way it handles newline characters in the subject string is the
|
particular, the way it handles newline characters in the subject string is the
|
||||||
Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only
|
Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only
|
||||||
<i>some</i> of the effects specified for REG_NEWLINE. It does not affect the way
|
<i>some</i> of the effects specified for REG_NEWLINE. It does not affect the way
|
||||||
newlines are matched by . (they aren't) or by a negative class such as [^a]
|
newlines are matched by . (they are not) or by a negative class such as [^a]
|
||||||
(they are).
|
(they are).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -143,6 +160,11 @@ The yield of <b>regcomp()</b> is zero on success, and non-zero otherwise. The
|
|||||||
is public: <i>re_nsub</i> contains the number of capturing subpatterns in
|
is public: <i>re_nsub</i> contains the number of capturing subpatterns in
|
||||||
the regular expression. Various error codes are defined in the header file.
|
the regular expression. Various error codes are defined in the header file.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
NOTE: If the yield of <b>regcomp()</b> is non-zero, you must not attempt to
|
||||||
|
use the contents of the <i>preg</i> structure. If, for example, you pass it to
|
||||||
|
<b>regexec()</b>, the result is undefined and your program is likely to crash.
|
||||||
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
|
<br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
This area is not simple, because POSIX and Perl take different views of things.
|
This area is not simple, because POSIX and Perl take different views of things.
|
||||||
@ -217,6 +239,10 @@ strings is returned. The <i>nmatch</i> and <i>pmatch</i> arguments of
|
|||||||
<b>regexec()</b> are ignored.
|
<b>regexec()</b> are ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
If the value of <i>nmatch</i> is zero, or if the value <i>pmatch</i> is NULL,
|
||||||
|
no data about any matched strings is returned.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
Otherwise,the portion of the string that was matched, and also any captured
|
Otherwise,the portion of the string that was matched, and also any captured
|
||||||
substrings, are returned via the <i>pmatch</i> argument, which points to an
|
substrings, are returned via the <i>pmatch</i> argument, which points to an
|
||||||
array of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the
|
array of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the
|
||||||
@ -257,9 +283,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC9" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 March 2009
|
Last updated: 09 January 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2009 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -28,24 +28,31 @@ instead of having to compile them every time the application is run.
|
|||||||
If you are not using any private character tables (see the
|
If you are not using any private character tables (see the
|
||||||
<a href="pcre_maketables.html"><b>pcre_maketables()</b></a>
|
<a href="pcre_maketables.html"><b>pcre_maketables()</b></a>
|
||||||
documentation), this is relatively straightforward. If you are using private
|
documentation), this is relatively straightforward. If you are using private
|
||||||
tables, it is a little bit more complicated.
|
tables, it is a little bit more complicated. However, if you are using the
|
||||||
|
just-in-time optimization feature, it is not possible to save and reload the
|
||||||
|
JIT data.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you save compiled patterns to a file, you can copy them to a different host
|
If you save compiled patterns to a file, you can copy them to a different host
|
||||||
and run them there. This works even if the new host has the opposite endianness
|
and run them there. If the two hosts have different endianness (byte order),
|
||||||
to the one on which the patterns were compiled. There may be a small
|
you should run the <b>pcre[16|32]_pattern_to_host_byte_order()</b> function on the
|
||||||
performance penalty, but it should be insignificant. However, compiling regular
|
new host before trying to match the pattern. The matching functions return
|
||||||
expressions with one version of PCRE for use with a different version is not
|
PCRE_ERROR_BADENDIANNESS if they detect a pattern with the wrong endianness.
|
||||||
guaranteed to work and may cause crashes.
|
</P>
|
||||||
|
<P>
|
||||||
|
Compiling regular expressions with one version of PCRE for use with a different
|
||||||
|
version is not guaranteed to work and may cause crashes, and saving and
|
||||||
|
restoring a compiled pattern loses any JIT optimization data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">SAVING A COMPILED PATTERN</a><br>
|
<br><a name="SEC2" href="#TOC1">SAVING A COMPILED PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
The value returned by <b>pcre_compile()</b> points to a single block of memory
|
The value returned by <b>pcre[16|32]_compile()</b> points to a single block of
|
||||||
that holds the compiled pattern and associated data. You can find the length of
|
memory that holds the compiled pattern and associated data. You can find the
|
||||||
this block in bytes by calling <b>pcre_fullinfo()</b> with an argument of
|
length of this block in bytes by calling <b>pcre[16|32]_fullinfo()</b> with an
|
||||||
PCRE_INFO_SIZE. You can then save the data in any appropriate manner. Here is
|
argument of PCRE_INFO_SIZE. You can then save the data in any appropriate
|
||||||
sample code that compiles a pattern and writes it to a file. It assumes that
|
manner. Here is sample code for the 8-bit library that compiles a pattern and
|
||||||
the variable <i>fd</i> refers to a file that is open for output:
|
writes it to a file. It assumes that the variable <i>fd</i> refers to a file
|
||||||
|
that is open for output:
|
||||||
<pre>
|
<pre>
|
||||||
int erroroffset, rc, size;
|
int erroroffset, rc, size;
|
||||||
char *error;
|
char *error;
|
||||||
@ -76,33 +83,36 @@ some daemon process that passes them via sockets to the processes that want
|
|||||||
them.
|
them.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If the pattern has been studied, it is also possible to save the study data in
|
If the pattern has been studied, it is also possible to save the normal study
|
||||||
a similar way to the compiled pattern itself. When studying generates
|
data in a similar way to the compiled pattern itself. However, if the
|
||||||
additional information, <b>pcre_study()</b> returns a pointer to a
|
PCRE_STUDY_JIT_COMPILE was used, the just-in-time data that is created cannot
|
||||||
<b>pcre_extra</b> data block. Its format is defined in the
|
be saved because it is too dependent on the current environment. When studying
|
||||||
|
generates additional information, <b>pcre[16|32]_study()</b> returns a pointer to a
|
||||||
|
<b>pcre[16|32]_extra</b> data block. Its format is defined in the
|
||||||
<a href="pcreapi.html#extradata">section on matching a pattern</a>
|
<a href="pcreapi.html#extradata">section on matching a pattern</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
documentation. The <i>study_data</i> field points to the binary study data, and
|
documentation. The <i>study_data</i> field points to the binary study data, and
|
||||||
this is what you must save (not the <b>pcre_extra</b> block itself). The length
|
this is what you must save (not the <b>pcre[16|32]_extra</b> block itself). The
|
||||||
of the study data can be obtained by calling <b>pcre_fullinfo()</b> with an
|
length of the study data can be obtained by calling <b>pcre[16|32]_fullinfo()</b>
|
||||||
argument of PCRE_INFO_STUDYSIZE. Remember to check that <b>pcre_study()</b> did
|
with an argument of PCRE_INFO_STUDYSIZE. Remember to check that
|
||||||
return a non-NULL value before trying to save the study data.
|
<b>pcre[16|32]_study()</b> did return a non-NULL value before trying to save the
|
||||||
|
study data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||||
memory, you pass its pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in
|
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary,
|
||||||
the usual way. This should work even on another host, and even if that host has
|
you pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
||||||
the opposite endianness to the one where the pattern was compiled.
|
the usual way.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
However, if you passed a pointer to custom character tables when the pattern
|
However, if you passed a pointer to custom character tables when the pattern
|
||||||
was compiled (the <i>tableptr</i> argument of <b>pcre_compile()</b>), you must
|
was compiled (the <i>tableptr</i> argument of <b>pcre[16|32]_compile()</b>), you
|
||||||
now pass a similar pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>,
|
must now pass a similar pointer to <b>pcre[16|32]_exec()</b> or
|
||||||
because the value saved with the compiled pattern will obviously be nonsense. A
|
<b>pcre[16|32]_dfa_exec()</b>, because the value saved with the compiled pattern
|
||||||
field in a <b>pcre_extra()</b> block is used to pass this data, as described in
|
will obviously be nonsense. A field in a <b>pcre[16|32]_extra()</b> block is used
|
||||||
the
|
to pass this data, as described in the
|
||||||
<a href="pcreapi.html#extradata">section on matching a pattern</a>
|
<a href="pcreapi.html#extradata">section on matching a pattern</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
@ -110,23 +120,23 @@ documentation.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you did not provide custom character tables when the pattern was compiled,
|
If you did not provide custom character tables when the pattern was compiled,
|
||||||
the pointer in the compiled pattern is NULL, which causes <b>pcre_exec()</b> to
|
the pointer in the compiled pattern is NULL, which causes the matching
|
||||||
use PCRE's internal tables. Thus, you do not need to take any special action at
|
functions to use PCRE's internal tables. Thus, you do not need to take any
|
||||||
run time in this case.
|
special action at run time in this case.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you saved study data with the compiled pattern, you need to create your own
|
If you saved study data with the compiled pattern, you need to create your own
|
||||||
<b>pcre_extra</b> data block and set the <i>study_data</i> field to point to the
|
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point to the
|
||||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
||||||
<i>flags</i> field to indicate that study data is present. Then pass the
|
<i>flags</i> field to indicate that study data is present. Then pass the
|
||||||
<b>pcre_extra</b> block to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in the
|
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
|
||||||
usual way.
|
pattern was studied for just-in-time optimization, that data cannot be saved,
|
||||||
|
and so is lost by a save/restore cycle.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a><br>
|
<br><a name="SEC4" href="#TOC1">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a><br>
|
||||||
<P>
|
<P>
|
||||||
In general, it is safest to recompile all saved patterns when you update to a
|
In general, it is safest to recompile all saved patterns when you update to a
|
||||||
new PCRE release, though not all updates actually require this. Recompiling is
|
new PCRE release, though not all updates actually require this.
|
||||||
definitely needed for release 7.2.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -139,9 +149,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 13 June 2007
|
Last updated: 24 June 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2007 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -17,14 +17,19 @@ PCRE SAMPLE PROGRAM
|
|||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
A simple, complete demonstration program, to get you started with using PCRE,
|
A simple, complete demonstration program, to get you started with using PCRE,
|
||||||
is supplied in the file <i>pcredemo.c</i> in the PCRE distribution.
|
is supplied in the file <i>pcredemo.c</i> in the PCRE distribution. A listing of
|
||||||
|
this program is given in the
|
||||||
|
<a href="pcredemo.html"><b>pcredemo</b></a>
|
||||||
|
documentation. If you do not have a copy of the PCRE distribution, you can save
|
||||||
|
this listing to re-create <i>pcredemo.c</i>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The program compiles the regular expression that is its first argument, and
|
The demonstration program, which uses the original PCRE 8-bit library, compiles
|
||||||
matches it against the subject string in its second argument. No PCRE options
|
the regular expression that is its first argument, and matches it against the
|
||||||
are set, and default character tables are used. If matching succeeds, the
|
subject string in its second argument. No PCRE options are set, and default
|
||||||
program outputs the portion of the subject that matched, together with the
|
character tables are used. If matching succeeds, the program outputs the
|
||||||
contents of any captured substrings.
|
portion of the subject that matched, together with the contents of any captured
|
||||||
|
substrings.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If the -g option is given on the command line, the program then goes on to
|
If the -g option is given on the command line, the program then goes on to
|
||||||
@ -34,8 +39,8 @@ an empty string. Comments in the code explain what is going on.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If PCRE is installed in the standard include and library directories for your
|
If PCRE is installed in the standard include and library directories for your
|
||||||
system, you should be able to compile the demonstration program using this
|
operating system, you should be able to compile the demonstration program using
|
||||||
command:
|
this command:
|
||||||
<pre>
|
<pre>
|
||||||
gcc -o pcredemo pcredemo.c -lpcre
|
gcc -o pcredemo pcredemo.c -lpcre
|
||||||
</pre>
|
</pre>
|
||||||
@ -46,22 +51,31 @@ like this:
|
|||||||
<pre>
|
<pre>
|
||||||
gcc -o pcredemo -I/usr/local/include pcredemo.c -L/usr/local/lib -lpcre
|
gcc -o pcredemo -I/usr/local/include pcredemo.c -L/usr/local/lib -lpcre
|
||||||
</pre>
|
</pre>
|
||||||
Once you have compiled the demonstration program, you can run simple tests like
|
In a Windows environment, if you want to statically link the program against a
|
||||||
this:
|
non-dll <b>pcre.a</b> file, you must uncomment the line that defines PCRE_STATIC
|
||||||
|
before including <b>pcre.h</b>, because otherwise the <b>pcre_malloc()</b> and
|
||||||
|
<b>pcre_free()</b> exported functions will be declared
|
||||||
|
<b>__declspec(dllimport)</b>, with unwanted results.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Once you have compiled and linked the demonstration program, you can run simple
|
||||||
|
tests like this:
|
||||||
<pre>
|
<pre>
|
||||||
./pcredemo 'cat|dog' 'the cat sat on the mat'
|
./pcredemo 'cat|dog' 'the cat sat on the mat'
|
||||||
./pcredemo -g 'cat|dog' 'the dog sat on the cat'
|
./pcredemo -g 'cat|dog' 'the dog sat on the cat'
|
||||||
</pre>
|
</pre>
|
||||||
Note that there is a much more comprehensive test program, called
|
Note that there is a much more comprehensive test program, called
|
||||||
<a href="pcretest.html"><b>pcretest</b>,</a>
|
<a href="pcretest.html"><b>pcretest</b>,</a>
|
||||||
which supports many more facilities for testing regular expressions and the
|
which supports many more facilities for testing regular expressions and both
|
||||||
PCRE library. The <b>pcredemo</b> program is provided as a simple coding
|
PCRE libraries. The
|
||||||
example.
|
<a href="pcredemo.html"><b>pcredemo</b></a>
|
||||||
|
program is provided as a simple coding example.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
On some operating systems (e.g. Solaris), when PCRE is not installed in the
|
If you try to run
|
||||||
standard library directory, you may get an error like this when you try to run
|
<a href="pcredemo.html"><b>pcredemo</b></a>
|
||||||
<b>pcredemo</b>:
|
when PCRE is not installed in the standard library directory, you may get an
|
||||||
|
error like this on some operating systems (e.g. Solaris):
|
||||||
<pre>
|
<pre>
|
||||||
ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory
|
ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory
|
||||||
</pre>
|
</pre>
|
||||||
@ -87,9 +101,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 January 2008
|
Last updated: 10 January 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2008 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -16,11 +16,14 @@ man page, in case the conversion went wrong.
|
|||||||
PCRE DISCUSSION OF STACK USAGE
|
PCRE DISCUSSION OF STACK USAGE
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
When you call <b>pcre_exec()</b>, it makes use of an internal function called
|
When you call <b>pcre[16|32]_exec()</b>, it makes use of an internal function
|
||||||
<b>match()</b>. This calls itself recursively at branch points in the pattern,
|
called <b>match()</b>. This calls itself recursively at branch points in the
|
||||||
in order to remember the state of the match so that it can back up and try a
|
pattern, in order to remember the state of the match so that it can back up and
|
||||||
different alternative if the first one fails. As matching proceeds deeper and
|
try a different alternative if the first one fails. As matching proceeds deeper
|
||||||
deeper into the tree of possibilities, the recursion depth increases.
|
and deeper into the tree of possibilities, the recursion depth increases. The
|
||||||
|
<b>match()</b> function is also called in other circumstances, for example,
|
||||||
|
whenever a parenthesized sub-pattern is entered, and in certain cases of
|
||||||
|
repetition.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Not all calls of <b>match()</b> increase the recursion depth; for an item such
|
Not all calls of <b>match()</b> increase the recursion depth; for an item such
|
||||||
@ -30,21 +33,34 @@ the recursive call would immediately be passed back as the result of the
|
|||||||
current call (a "tail recursion"), the function is just restarted instead.
|
current call (a "tail recursion"), the function is just restarted instead.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre_dfa_exec()</b> function operates in an entirely different way, and
|
The above comments apply when <b>pcre[16|32]_exec()</b> is run in its normal
|
||||||
hardly uses recursion at all. The limit on its complexity is the amount of
|
interpretive manner. If the pattern was studied with the
|
||||||
workspace it is given. The comments that follow do NOT apply to
|
PCRE_STUDY_JIT_COMPILE option, and just-in-time compiling was successful, and
|
||||||
<b>pcre_dfa_exec()</b>; they are relevant only for <b>pcre_exec()</b>.
|
the options passed to <b>pcre[16|32]_exec()</b> were not incompatible, the matching
|
||||||
|
process uses the JIT-compiled code instead of the <b>match()</b> function. In
|
||||||
|
this case, the memory requirements are handled entirely differently. See the
|
||||||
|
<a href="pcrejit.html"><b>pcrejit</b></a>
|
||||||
|
documentation for details.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
You can set limits on the number of times that <b>match()</b> is called, both in
|
The <b>pcre[16|32]_dfa_exec()</b> function operates in an entirely different way,
|
||||||
total and recursively. If the limit is exceeded, an error occurs. For details,
|
and uses recursion only when there is a regular expression recursion or
|
||||||
see the
|
subroutine call in the pattern. This includes the processing of assertion and
|
||||||
<a href="pcreapi.html#extradata">section on extra data for <b>pcre_exec()</b></a>
|
"once-only" subpatterns, which are handled like subroutine calls. Normally,
|
||||||
in the
|
these are never very deep, and the limit on the complexity of
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<b>pcre[16|32]_dfa_exec()</b> is controlled by the amount of workspace it is given.
|
||||||
documentation.
|
However, it is possible to write patterns with runaway infinite recursions;
|
||||||
|
such patterns will cause <b>pcre[16|32]_dfa_exec()</b> to run out of stack. At
|
||||||
|
present, there is no protection against this.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
The comments that follow do NOT apply to <b>pcre[16|32]_dfa_exec()</b>; they are
|
||||||
|
relevant only for <b>pcre[16|32]_exec()</b> without the JIT optimization.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Reducing <b>pcre[16|32]_exec()</b>'s stack usage
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
Each time that <b>match()</b> is actually called recursively, it uses memory
|
Each time that <b>match()</b> is actually called recursively, it uses memory
|
||||||
from the process stack. For certain kinds of pattern and data, very large
|
from the process stack. For certain kinds of pattern and data, very large
|
||||||
amounts of stack may be needed, despite the recognition of "tail recursion".
|
amounts of stack may be needed, despite the recognition of "tail recursion".
|
||||||
@ -78,42 +94,79 @@ subject strings is to write repeated parenthesized subpatterns to match more
|
|||||||
than one character whenever possible.
|
than one character whenever possible.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Compiling PCRE to use heap instead of stack
|
Compiling PCRE to use heap instead of stack for <b>pcre[16|32]_exec()</b>
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
In environments where stack memory is constrained, you might want to compile
|
In environments where stack memory is constrained, you might want to compile
|
||||||
PCRE to use heap memory instead of stack for remembering back-up points. This
|
PCRE to use heap memory instead of stack for remembering back-up points when
|
||||||
makes it run a lot more slowly, however. Details of how to do this are given in
|
<b>pcre[16|32]_exec()</b> is running. This makes it run a lot more slowly, however.
|
||||||
the
|
Details of how to do this are given in the
|
||||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||||
documentation. When built in this way, instead of using the stack, PCRE obtains
|
documentation. When built in this way, instead of using the stack, PCRE obtains
|
||||||
and frees memory by calling the functions that are pointed to by the
|
and frees memory by calling the functions that are pointed to by the
|
||||||
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables. By default, these
|
<b>pcre[16|32]_stack_malloc</b> and <b>pcre[16|32]_stack_free</b> variables. By
|
||||||
point to <b>malloc()</b> and <b>free()</b>, but you can replace the pointers to
|
default, these point to <b>malloc()</b> and <b>free()</b>, but you can replace
|
||||||
cause PCRE to use your own functions. Since the block sizes are always the
|
the pointers to cause PCRE to use your own functions. Since the block sizes are
|
||||||
same, and are always freed in reverse order, it may be possible to implement
|
always the same, and are always freed in reverse order, it may be possible to
|
||||||
customized memory handlers that are more efficient than the standard functions.
|
implement customized memory handlers that are more efficient than the standard
|
||||||
|
functions.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Limiting PCRE's stack usage
|
Limiting <b>pcre[16|32]_exec()</b>'s stack usage
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE has an internal counter that can be used to limit the depth of recursion,
|
You can set limits on the number of times that <b>match()</b> is called, both in
|
||||||
and thus cause <b>pcre_exec()</b> to give an error code before it runs out of
|
total and recursively. If a limit is exceeded, <b>pcre[16|32]_exec()</b> returns an
|
||||||
stack. By default, the limit is very large, and unlikely ever to operate. It
|
error code. Setting suitable limits should prevent it from running out of
|
||||||
can be changed when PCRE is built, and it can also be set when
|
stack. The default values of the limits are very large, and unlikely ever to
|
||||||
<b>pcre_exec()</b> is called. For details of these interfaces, see the
|
operate. They can be changed when PCRE is built, and they can also be set when
|
||||||
|
<b>pcre[16|32]_exec()</b> is called. For details of these interfaces, see the
|
||||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||||
and
|
documentation and the
|
||||||
|
<a href="pcreapi.html#extradata">section on extra data for <b>pcre[16|32]_exec()</b></a>
|
||||||
|
in the
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
As a very rough rule of thumb, you should reckon on about 500 bytes per
|
As a very rough rule of thumb, you should reckon on about 500 bytes per
|
||||||
recursion. Thus, if you want to limit your stack usage to 8Mb, you
|
recursion. Thus, if you want to limit your stack usage to 8Mb, you should set
|
||||||
should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
|
the limit at 16000 recursions. A 64Mb stack, on the other hand, can support
|
||||||
support around 128000 recursions. The <b>pcretest</b> test program has a command
|
around 128000 recursions.
|
||||||
line option (<b>-S</b>) that can be used to increase the size of its stack.
|
</P>
|
||||||
|
<P>
|
||||||
|
In Unix-like environments, the <b>pcretest</b> test program has a command line
|
||||||
|
option (<b>-S</b>) that can be used to increase the size of its stack. As long
|
||||||
|
as the stack is large enough, another option (<b>-M</b>) can be used to find the
|
||||||
|
smallest limits that allow a particular pattern to match a given subject
|
||||||
|
string. This is done by calling <b>pcre[16|32]_exec()</b> repeatedly with different
|
||||||
|
limits.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Obtaining an estimate of stack usage
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The actual amount of stack used per recursion can vary quite a lot, depending
|
||||||
|
on the compiler that was used to build PCRE and the optimization or debugging
|
||||||
|
options that were set for it. The rule of thumb value of 500 bytes mentioned
|
||||||
|
above may be larger or smaller than what is actually needed. A better
|
||||||
|
approximation can be obtained by running this command:
|
||||||
|
<pre>
|
||||||
|
pcretest -m -C
|
||||||
|
</pre>
|
||||||
|
The <b>-C</b> option causes <b>pcretest</b> to output information about the
|
||||||
|
options with which PCRE was compiled. When <b>-m</b> is also given (before
|
||||||
|
<b>-C</b>), information about stack use is given in a line like this:
|
||||||
|
<pre>
|
||||||
|
Match recursion uses stack: approximate frame size = 640 bytes
|
||||||
|
</pre>
|
||||||
|
The value is approximate because some recursions need a bit more (up to perhaps
|
||||||
|
16 more bytes).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If the above command is given when PCRE is compiled to use the heap instead of
|
||||||
|
the stack for recursion, the value that is output is the size of each block
|
||||||
|
that is obtained from the heap.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Changing stack size in Unix-like systems
|
Changing stack size in Unix-like systems
|
||||||
@ -137,7 +190,7 @@ limit on stack size by code such as this:
|
|||||||
</pre>
|
</pre>
|
||||||
This reads the current limits (soft and hard) using <b>getrlimit()</b>, then
|
This reads the current limits (soft and hard) using <b>getrlimit()</b>, then
|
||||||
attempts to increase the soft limit to 100Mb using <b>setrlimit()</b>. You must
|
attempts to increase the soft limit to 100Mb using <b>setrlimit()</b>. You must
|
||||||
do this before calling <b>pcre_exec()</b>.
|
do this before calling <b>pcre[16|32]_exec()</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Changing stack size in Mac OS X
|
Changing stack size in Mac OS X
|
||||||
@ -163,9 +216,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 09 July 2008
|
Last updated: 24 June 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2008 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -17,36 +17,36 @@ man page, in case the conversion went wrong.
|
|||||||
<li><a name="TOC2" href="#SEC2">QUOTING</a>
|
<li><a name="TOC2" href="#SEC2">QUOTING</a>
|
||||||
<li><a name="TOC3" href="#SEC3">CHARACTERS</a>
|
<li><a name="TOC3" href="#SEC3">CHARACTERS</a>
|
||||||
<li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
|
<li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
|
||||||
<li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a>
|
<li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
|
||||||
<li><a name="TOC6" href="#SEC6">SCRIPT NAMES FOR \p AND \P</a>
|
<li><a name="TOC6" href="#SEC6">PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
|
||||||
<li><a name="TOC7" href="#SEC7">CHARACTER CLASSES</a>
|
<li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a>
|
||||||
<li><a name="TOC8" href="#SEC8">QUANTIFIERS</a>
|
<li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
|
||||||
<li><a name="TOC9" href="#SEC9">ANCHORS AND SIMPLE ASSERTIONS</a>
|
<li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
|
||||||
<li><a name="TOC10" href="#SEC10">MATCH POINT RESET</a>
|
<li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
|
||||||
<li><a name="TOC11" href="#SEC11">ALTERNATION</a>
|
<li><a name="TOC11" href="#SEC11">MATCH POINT RESET</a>
|
||||||
<li><a name="TOC12" href="#SEC12">CAPTURING</a>
|
<li><a name="TOC12" href="#SEC12">ALTERNATION</a>
|
||||||
<li><a name="TOC13" href="#SEC13">ATOMIC GROUPS</a>
|
<li><a name="TOC13" href="#SEC13">CAPTURING</a>
|
||||||
<li><a name="TOC14" href="#SEC14">COMMENT</a>
|
<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
|
||||||
<li><a name="TOC15" href="#SEC15">OPTION SETTING</a>
|
<li><a name="TOC15" href="#SEC15">COMMENT</a>
|
||||||
<li><a name="TOC16" href="#SEC16">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
|
||||||
<li><a name="TOC17" href="#SEC17">BACKREFERENCES</a>
|
<li><a name="TOC17" href="#SEC17">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
||||||
<li><a name="TOC18" href="#SEC18">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
<li><a name="TOC18" href="#SEC18">BACKREFERENCES</a>
|
||||||
<li><a name="TOC19" href="#SEC19">CONDITIONAL PATTERNS</a>
|
<li><a name="TOC19" href="#SEC19">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
||||||
<li><a name="TOC20" href="#SEC20">BACKTRACKING CONTROL</a>
|
<li><a name="TOC20" href="#SEC20">CONDITIONAL PATTERNS</a>
|
||||||
<li><a name="TOC21" href="#SEC21">NEWLINE CONVENTIONS</a>
|
<li><a name="TOC21" href="#SEC21">BACKTRACKING CONTROL</a>
|
||||||
<li><a name="TOC22" href="#SEC22">WHAT \R MATCHES</a>
|
<li><a name="TOC22" href="#SEC22">NEWLINE CONVENTIONS</a>
|
||||||
<li><a name="TOC23" href="#SEC23">CALLOUTS</a>
|
<li><a name="TOC23" href="#SEC23">WHAT \R MATCHES</a>
|
||||||
<li><a name="TOC24" href="#SEC24">SEE ALSO</a>
|
<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
|
||||||
<li><a name="TOC25" href="#SEC25">AUTHOR</a>
|
<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
|
||||||
<li><a name="TOC26" href="#SEC26">REVISION</a>
|
<li><a name="TOC26" href="#SEC26">AUTHOR</a>
|
||||||
|
<li><a name="TOC27" href="#SEC27">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
|
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
|
||||||
<P>
|
<P>
|
||||||
The full syntax and semantics of the regular expressions that are supported by
|
The full syntax and semantics of the regular expressions that are supported by
|
||||||
PCRE are described in the
|
PCRE are described in the
|
||||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
documentation. This document contains just a quick-reference summary of the
|
documentation. This document contains a quick-reference summary of the syntax.
|
||||||
syntax.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">QUOTING</a><br>
|
<br><a name="SEC2" href="#TOC1">QUOTING</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -59,9 +59,9 @@ syntax.
|
|||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
\a alarm, that is, the BEL character (hex 07)
|
\a alarm, that is, the BEL character (hex 07)
|
||||||
\cx "control-x", where x is any character
|
\cx "control-x", where x is any ASCII character
|
||||||
\e escape (hex 1B)
|
\e escape (hex 1B)
|
||||||
\f formfeed (hex 0C)
|
\f form feed (hex 0C)
|
||||||
\n newline (hex 0A)
|
\n newline (hex 0A)
|
||||||
\r carriage return (hex 0D)
|
\r carriage return (hex 0D)
|
||||||
\t tab (hex 09)
|
\t tab (hex 09)
|
||||||
@ -75,25 +75,28 @@ syntax.
|
|||||||
<pre>
|
<pre>
|
||||||
. any character except newline;
|
. any character except newline;
|
||||||
in dotall mode, any character whatsoever
|
in dotall mode, any character whatsoever
|
||||||
\C one byte, even in UTF-8 mode (best avoided)
|
\C one data unit, even in UTF mode (best avoided)
|
||||||
\d a decimal digit
|
\d a decimal digit
|
||||||
\D a character that is not a decimal digit
|
\D a character that is not a decimal digit
|
||||||
\h a horizontal whitespace character
|
\h a horizontal white space character
|
||||||
\H a character that is not a horizontal whitespace character
|
\H a character that is not a horizontal white space character
|
||||||
|
\N a character that is not a newline
|
||||||
\p{<i>xx</i>} a character with the <i>xx</i> property
|
\p{<i>xx</i>} a character with the <i>xx</i> property
|
||||||
\P{<i>xx</i>} a character without the <i>xx</i> property
|
\P{<i>xx</i>} a character without the <i>xx</i> property
|
||||||
\R a newline sequence
|
\R a newline sequence
|
||||||
\s a whitespace character
|
\s a white space character
|
||||||
\S a character that is not a whitespace character
|
\S a character that is not a white space character
|
||||||
\v a vertical whitespace character
|
\v a vertical white space character
|
||||||
\V a character that is not a vertical whitespace character
|
\V a character that is not a vertical white space character
|
||||||
\w a "word" character
|
\w a "word" character
|
||||||
\W a "non-word" character
|
\W a "non-word" character
|
||||||
\X an extended Unicode sequence
|
\X a Unicode extended grapheme cluster
|
||||||
</pre>
|
</pre>
|
||||||
In PCRE, \d, \D, \s, \S, \w, and \W recognize only ASCII characters.
|
In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
|
||||||
|
characters, even in a UTF mode. However, this can be changed by setting the
|
||||||
|
PCRE_UCP option.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a><br>
|
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
C Other
|
C Other
|
||||||
@ -142,18 +145,32 @@ In PCRE, \d, \D, \s, \S, \w, and \W recognize only ASCII characters.
|
|||||||
Zs Space separator
|
Zs Space separator
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
<br><a name="SEC6" href="#TOC1">PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||||
|
<P>
|
||||||
|
<pre>
|
||||||
|
Xan Alphanumeric: union of properties L and N
|
||||||
|
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||||
|
Xsp Perl space: property Z or tab, NL, FF, CR
|
||||||
|
Xwd Perl word: property Xan or underscore
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
||||||
<P>
|
<P>
|
||||||
Arabic,
|
Arabic,
|
||||||
Armenian,
|
Armenian,
|
||||||
|
Avestan,
|
||||||
Balinese,
|
Balinese,
|
||||||
|
Bamum,
|
||||||
|
Batak,
|
||||||
Bengali,
|
Bengali,
|
||||||
Bopomofo,
|
Bopomofo,
|
||||||
|
Brahmi,
|
||||||
Braille,
|
Braille,
|
||||||
Buginese,
|
Buginese,
|
||||||
Buhid,
|
Buhid,
|
||||||
Canadian_Aboriginal,
|
Canadian_Aboriginal,
|
||||||
Carian,
|
Carian,
|
||||||
|
Chakma,
|
||||||
Cham,
|
Cham,
|
||||||
Cherokee,
|
Cherokee,
|
||||||
Common,
|
Common,
|
||||||
@ -163,6 +180,7 @@ Cypriot,
|
|||||||
Cyrillic,
|
Cyrillic,
|
||||||
Deseret,
|
Deseret,
|
||||||
Devanagari,
|
Devanagari,
|
||||||
|
Egyptian_Hieroglyphs,
|
||||||
Ethiopic,
|
Ethiopic,
|
||||||
Georgian,
|
Georgian,
|
||||||
Glagolitic,
|
Glagolitic,
|
||||||
@ -175,7 +193,12 @@ Hangul,
|
|||||||
Hanunoo,
|
Hanunoo,
|
||||||
Hebrew,
|
Hebrew,
|
||||||
Hiragana,
|
Hiragana,
|
||||||
|
Imperial_Aramaic,
|
||||||
Inherited,
|
Inherited,
|
||||||
|
Inscriptional_Pahlavi,
|
||||||
|
Inscriptional_Parthian,
|
||||||
|
Javanese,
|
||||||
|
Kaithi,
|
||||||
Kannada,
|
Kannada,
|
||||||
Katakana,
|
Katakana,
|
||||||
Kayah_Li,
|
Kayah_Li,
|
||||||
@ -186,9 +209,15 @@ Latin,
|
|||||||
Lepcha,
|
Lepcha,
|
||||||
Limbu,
|
Limbu,
|
||||||
Linear_B,
|
Linear_B,
|
||||||
|
Lisu,
|
||||||
Lycian,
|
Lycian,
|
||||||
Lydian,
|
Lydian,
|
||||||
Malayalam,
|
Malayalam,
|
||||||
|
Mandaic,
|
||||||
|
Meetei_Mayek,
|
||||||
|
Meroitic_Cursive,
|
||||||
|
Meroitic_Hieroglyphs,
|
||||||
|
Miao,
|
||||||
Mongolian,
|
Mongolian,
|
||||||
Myanmar,
|
Myanmar,
|
||||||
New_Tai_Lue,
|
New_Tai_Lue,
|
||||||
@ -196,6 +225,8 @@ Nko,
|
|||||||
Ogham,
|
Ogham,
|
||||||
Old_Italic,
|
Old_Italic,
|
||||||
Old_Persian,
|
Old_Persian,
|
||||||
|
Old_South_Arabian,
|
||||||
|
Old_Turkic,
|
||||||
Ol_Chiki,
|
Ol_Chiki,
|
||||||
Oriya,
|
Oriya,
|
||||||
Osmanya,
|
Osmanya,
|
||||||
@ -203,15 +234,21 @@ Phags_Pa,
|
|||||||
Phoenician,
|
Phoenician,
|
||||||
Rejang,
|
Rejang,
|
||||||
Runic,
|
Runic,
|
||||||
|
Samaritan,
|
||||||
Saurashtra,
|
Saurashtra,
|
||||||
|
Sharada,
|
||||||
Shavian,
|
Shavian,
|
||||||
Sinhala,
|
Sinhala,
|
||||||
Sudanese,
|
Sora_Sompeng,
|
||||||
|
Sundanese,
|
||||||
Syloti_Nagri,
|
Syloti_Nagri,
|
||||||
Syriac,
|
Syriac,
|
||||||
Tagalog,
|
Tagalog,
|
||||||
Tagbanwa,
|
Tagbanwa,
|
||||||
Tai_Le,
|
Tai_Le,
|
||||||
|
Tai_Tham,
|
||||||
|
Tai_Viet,
|
||||||
|
Takri,
|
||||||
Tamil,
|
Tamil,
|
||||||
Telugu,
|
Telugu,
|
||||||
Thaana,
|
Thaana,
|
||||||
@ -222,7 +259,7 @@ Ugaritic,
|
|||||||
Vai,
|
Vai,
|
||||||
Yi.
|
Yi.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">CHARACTER CLASSES</a><br>
|
<br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
[...] positive character class
|
[...] positive character class
|
||||||
@ -241,15 +278,16 @@ Yi.
|
|||||||
lower lower case letter
|
lower lower case letter
|
||||||
print printing, including space
|
print printing, including space
|
||||||
punct printing, excluding alphanumeric
|
punct printing, excluding alphanumeric
|
||||||
space whitespace
|
space white space
|
||||||
upper upper case letter
|
upper upper case letter
|
||||||
word same as \w
|
word same as \w
|
||||||
xdigit hexadecimal digit
|
xdigit hexadecimal digit
|
||||||
</pre>
|
</pre>
|
||||||
In PCRE, POSIX character set names recognize only ASCII characters. You can use
|
In PCRE, POSIX character set names recognize only ASCII characters by default,
|
||||||
|
but some of them use Unicode properties if PCRE_UCP is set. You can use
|
||||||
\Q...\E inside a character class.
|
\Q...\E inside a character class.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">QUANTIFIERS</a><br>
|
<br><a name="SEC9" href="#TOC1">QUANTIFIERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
? 0 or 1, greedy
|
? 0 or 1, greedy
|
||||||
@ -270,10 +308,10 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
|
|||||||
{n,}? n or more, lazy
|
{n,}? n or more, lazy
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
|
<br><a name="SEC10" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
\b word boundary (only ASCII letters recognized)
|
\b word boundary
|
||||||
\B not a word boundary
|
\B not a word boundary
|
||||||
^ start of subject
|
^ start of subject
|
||||||
also after internal newline in multiline mode
|
also after internal newline in multiline mode
|
||||||
@ -287,19 +325,19 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
|
|||||||
\G first matching position in subject
|
\G first matching position in subject
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">MATCH POINT RESET</a><br>
|
<br><a name="SEC11" href="#TOC1">MATCH POINT RESET</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
\K reset start of match
|
\K reset start of match
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">ALTERNATION</a><br>
|
<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
expr|expr|expr...
|
expr|expr|expr...
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">CAPTURING</a><br>
|
<br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(...) capturing group
|
(...) capturing group
|
||||||
@ -311,19 +349,19 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
|
|||||||
capturing groups in each alternative
|
capturing groups in each alternative
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">ATOMIC GROUPS</a><br>
|
<br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?>...) atomic, non-capturing group
|
(?>...) atomic, non-capturing group
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">COMMENT</a><br>
|
<br><a name="SEC15" href="#TOC1">COMMENT</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?#....) comment (not nestable)
|
(?#....) comment (not nestable)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">OPTION SETTING</a><br>
|
<br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?i) caseless
|
(?i) caseless
|
||||||
@ -334,13 +372,18 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
|
|||||||
(?x) extended (ignore white space)
|
(?x) extended (ignore white space)
|
||||||
(?-...) unset option(s)
|
(?-...) unset option(s)
|
||||||
</pre>
|
</pre>
|
||||||
The following is recognized only at the start of a pattern or after one of the
|
The following are recognized only at the start of a pattern or after one of the
|
||||||
newline-setting options with similar syntax:
|
newline-setting options with similar syntax:
|
||||||
<pre>
|
<pre>
|
||||||
(*UTF8) set UTF-8 mode
|
(*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
|
||||||
|
(*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8)
|
||||||
|
(*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16)
|
||||||
|
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
||||||
|
(*UTF) set appropriate UTF mode for the library in use
|
||||||
|
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
<br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?=...) positive look ahead
|
(?=...) positive look ahead
|
||||||
@ -350,7 +393,7 @@ newline-setting options with similar syntax:
|
|||||||
</pre>
|
</pre>
|
||||||
Each top-level branch of a look behind must be of a fixed length.
|
Each top-level branch of a look behind must be of a fixed length.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">BACKREFERENCES</a><br>
|
<br><a name="SEC18" href="#TOC1">BACKREFERENCES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
\n reference by number (can be ambiguous)
|
\n reference by number (can be ambiguous)
|
||||||
@ -364,7 +407,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||||||
(?P=name) reference by name (Python)
|
(?P=name) reference by name (Python)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
<br><a name="SEC19" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?R) recurse whole pattern
|
(?R) recurse whole pattern
|
||||||
@ -383,7 +426,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||||||
\g'-n' call subpattern by relative number (PCRE extension)
|
\g'-n' call subpattern by relative number (PCRE extension)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC19" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
<br><a name="SEC20" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?(condition)yes-pattern)
|
(?(condition)yes-pattern)
|
||||||
@ -402,12 +445,13 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||||||
(?(assert)... assertion condition
|
(?(assert)... assertion condition
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC20" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
<br><a name="SEC21" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||||
<P>
|
<P>
|
||||||
The following act immediately they are reached:
|
The following act immediately they are reached:
|
||||||
<pre>
|
<pre>
|
||||||
(*ACCEPT) force successful match
|
(*ACCEPT) force successful match
|
||||||
(*FAIL) force backtrack; synonym (*F)
|
(*FAIL) force backtrack; synonym (*F)
|
||||||
|
(*MARK:NAME) set name to be passed back; synonym (*:NAME)
|
||||||
</pre>
|
</pre>
|
||||||
The following act only when a subsequent match failure causes a backtrack to
|
The following act only when a subsequent match failure causes a backtrack to
|
||||||
reach them. They all force a match failure, but they differ in what happens
|
reach them. They all force a match failure, but they differ in what happens
|
||||||
@ -416,14 +460,18 @@ pattern is not anchored.
|
|||||||
<pre>
|
<pre>
|
||||||
(*COMMIT) overall failure, no advance of starting point
|
(*COMMIT) overall failure, no advance of starting point
|
||||||
(*PRUNE) advance to next starting character
|
(*PRUNE) advance to next starting character
|
||||||
(*SKIP) advance start to current matching position
|
(*PRUNE:NAME) equivalent to (*MARK:NAME)(*PRUNE)
|
||||||
|
(*SKIP) advance to current matching position
|
||||||
|
(*SKIP:NAME) advance to position corresponding to an earlier
|
||||||
|
(*MARK:NAME); if not found, the (*SKIP) is ignored
|
||||||
(*THEN) local failure, backtrack to next alternation
|
(*THEN) local failure, backtrack to next alternation
|
||||||
|
(*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">NEWLINE CONVENTIONS</a><br>
|
<br><a name="SEC22" href="#TOC1">NEWLINE CONVENTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
These are recognized only at the very start of the pattern or after a
|
These are recognized only at the very start of the pattern or after a
|
||||||
(*BSR_...) or (*UTF8) option.
|
(*BSR_...), (*UTF8), (*UTF16), (*UTF32) or (*UCP) option.
|
||||||
<pre>
|
<pre>
|
||||||
(*CR) carriage return only
|
(*CR) carriage return only
|
||||||
(*LF) linefeed only
|
(*LF) linefeed only
|
||||||
@ -432,28 +480,28 @@ These are recognized only at the very start of the pattern or after a
|
|||||||
(*ANY) any Unicode newline sequence
|
(*ANY) any Unicode newline sequence
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">WHAT \R MATCHES</a><br>
|
<br><a name="SEC23" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||||
<P>
|
<P>
|
||||||
These are recognized only at the very start of the pattern or after a
|
These are recognized only at the very start of the pattern or after a
|
||||||
(*...) option that sets the newline convention or UTF-8 mode.
|
(*...) option that sets the newline convention or a UTF or UCP mode.
|
||||||
<pre>
|
<pre>
|
||||||
(*BSR_ANYCRLF) CR, LF, or CRLF
|
(*BSR_ANYCRLF) CR, LF, or CRLF
|
||||||
(*BSR_UNICODE) any Unicode newline sequence
|
(*BSR_UNICODE) any Unicode newline sequence
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC23" href="#TOC1">CALLOUTS</a><br>
|
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?C) callout
|
(?C) callout
|
||||||
(?Cn) callout with data n
|
(?Cn) callout with data n
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcrepattern</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3),
|
<b>pcrepattern</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3),
|
||||||
<b>pcrematching</b>(3), <b>pcre</b>(3).
|
<b>pcrematching</b>(3), <b>pcre</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC26" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -462,11 +510,11 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 April 2009
|
Last updated: 11 November 2012
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2009 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
File diff suppressed because it is too large
Load Diff
270
tools/pcre/doc/html/pcreunicode.html
Normal file
270
tools/pcre/doc/html/pcreunicode.html
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcreunicode specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcreunicode man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
UTF-8, UTF-16, UTF-32, AND UNICODE PROPERTY SUPPORT
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
As well as UTF-8 support, PCRE also supports UTF-16 (from release 8.30) and
|
||||||
|
UTF-32 (from release 8.32), by means of two additional libraries. They can be
|
||||||
|
built as well as, or instead of, the 8-bit library.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
UTF-8 SUPPORT
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
In order process UTF-8 strings, you must build PCRE's 8-bit library with UTF
|
||||||
|
support, and, in addition, you must call
|
||||||
|
<a href="pcre_compile.html"><b>pcre_compile()</b></a>
|
||||||
|
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
|
||||||
|
(*UTF8) or (*UTF). When either of these is the case, both the pattern and any
|
||||||
|
subject strings that are matched against it are treated as UTF-8 strings
|
||||||
|
instead of strings of individual 1-byte characters.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
UTF-16 AND UTF-32 SUPPORT
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
In order process UTF-16 or UTF-32 strings, you must build PCRE's 16-bit or
|
||||||
|
32-bit library with UTF support, and, in addition, you must call
|
||||||
|
<a href="pcre16_compile.html"><b>pcre16_compile()</b></a>
|
||||||
|
or
|
||||||
|
<a href="pcre32_compile.html"><b>pcre32_compile()</b></a>
|
||||||
|
with the PCRE_UTF16 or PCRE_UTF32 option flag, as appropriate. Alternatively,
|
||||||
|
the pattern must start with the sequence (*UTF16), (*UTF32), as appropriate, or
|
||||||
|
(*UTF), which can be used with either library. When UTF mode is set, both the
|
||||||
|
pattern and any subject strings that are matched against it are treated as
|
||||||
|
UTF-16 or UTF-32 strings instead of strings of individual 16-bit or 32-bit
|
||||||
|
characters.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
UTF SUPPORT OVERHEAD
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If you compile PCRE with UTF support, but do not use it at run time, the
|
||||||
|
library will be a bit bigger, but the additional run time overhead is limited
|
||||||
|
to testing the PCRE_UTF[8|16|32] flag occasionally, so should not be very big.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
UNICODE PROPERTY SUPPORT
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If PCRE is built with Unicode character property support (which implies UTF
|
||||||
|
support), the escape sequences \p{..}, \P{..}, and \X can be used.
|
||||||
|
The available properties that can be tested are limited to the general
|
||||||
|
category properties such as Lu for an upper case letter or Nd for a decimal
|
||||||
|
number, the Unicode script names such as Arabic or Han, and the derived
|
||||||
|
properties Any and L&. Full lists is given in the
|
||||||
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
|
and
|
||||||
|
<a href="pcresyntax.html"><b>pcresyntax</b></a>
|
||||||
|
documentation. Only the short names for properties are supported. For example,
|
||||||
|
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
|
||||||
|
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
||||||
|
compatibility with Perl 5.6. PCRE does not support this.
|
||||||
|
<a name="utf8strings"></a></P>
|
||||||
|
<br><b>
|
||||||
|
Validity of UTF-8 strings
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
When you set the PCRE_UTF8 flag, the byte strings passed as patterns and
|
||||||
|
subjects are (by default) checked for validity on entry to the relevant
|
||||||
|
functions. The entire string is checked before any other processing takes
|
||||||
|
place. From release 7.3 of PCRE, the check is according the rules of RFC 3629,
|
||||||
|
which are themselves derived from the Unicode specification. Earlier releases
|
||||||
|
of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit
|
||||||
|
values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0
|
||||||
|
to U+10FFFF, excluding the surrogate area and the non-characters.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
||||||
|
where they are used in pairs to encode codepoints with values greater than
|
||||||
|
0xFFFF. The code points that are encoded by UTF-16 pairs are available
|
||||||
|
independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
|
||||||
|
surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
|
||||||
|
UTF-32.)
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Also excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
|
||||||
|
and the last two code points in each plane, U+??FFFE and U+??FFFF.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If an invalid UTF-8 string is passed to PCRE, an error return is given. At
|
||||||
|
compile time, the only additional information is the offset to the first byte
|
||||||
|
of the failing character. The run-time functions <b>pcre_exec()</b> and
|
||||||
|
<b>pcre_dfa_exec()</b> also pass back this information, as well as a more
|
||||||
|
detailed reason code if the caller has provided memory in which to do this.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In some situations, you may already know that your strings are valid, and
|
||||||
|
therefore want to skip these checks in order to improve performance, for
|
||||||
|
example in the case of a long subject string that is being scanned repeatedly.
|
||||||
|
If you set the PCRE_NO_UTF8_CHECK flag at compile time or at run time, PCRE
|
||||||
|
assumes that the pattern or subject it is given (respectively) contains only
|
||||||
|
valid UTF-8 codes. In this case, it does not diagnose an invalid UTF-8 string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that passing PCRE_NO_UTF8_CHECK to <b>pcre_compile()</b> just disables the
|
||||||
|
check for the pattern; it does not also apply to subject strings. If you want
|
||||||
|
to disable the check for a subject string you must pass this option to
|
||||||
|
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, the result
|
||||||
|
is undefined and your program may crash.
|
||||||
|
<a name="utf16strings"></a></P>
|
||||||
|
<br><b>
|
||||||
|
Validity of UTF-16 strings
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
When you set the PCRE_UTF16 flag, the strings of 16-bit data units that are
|
||||||
|
passed as patterns and subjects are (by default) checked for validity on entry
|
||||||
|
to the relevant functions. Values other than those in the surrogate range
|
||||||
|
U+D800 to U+DFFF are independent code points. Values in the surrogate range
|
||||||
|
must be used in pairs in the correct manner.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
|
||||||
|
and the last two code points in each plane, U+??FFFE and U+??FFFF.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If an invalid UTF-16 string is passed to PCRE, an error return is given. At
|
||||||
|
compile time, the only additional information is the offset to the first data
|
||||||
|
unit of the failing character. The run-time functions <b>pcre16_exec()</b> and
|
||||||
|
<b>pcre16_dfa_exec()</b> also pass back this information, as well as a more
|
||||||
|
detailed reason code if the caller has provided memory in which to do this.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In some situations, you may already know that your strings are valid, and
|
||||||
|
therefore want to skip these checks in order to improve performance. If you set
|
||||||
|
the PCRE_NO_UTF16_CHECK flag at compile time or at run time, PCRE assumes that
|
||||||
|
the pattern or subject it is given (respectively) contains only valid UTF-16
|
||||||
|
sequences. In this case, it does not diagnose an invalid UTF-16 string.
|
||||||
|
However, if an invalid string is passed, the result is undefined.
|
||||||
|
<a name="utf32strings"></a></P>
|
||||||
|
<br><b>
|
||||||
|
Validity of UTF-32 strings
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are
|
||||||
|
passed as patterns and subjects are (by default) checked for validity on entry
|
||||||
|
to the relevant functions. This check allows only values in the range U+0
|
||||||
|
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF, and the
|
||||||
|
"Non-Character" code points, which are U+FDD0 to U+FDEF and the last two
|
||||||
|
characters in each plane, U+??FFFE and U+??FFFF.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If an invalid UTF-32 string is passed to PCRE, an error return is given. At
|
||||||
|
compile time, the only additional information is the offset to the first data
|
||||||
|
unit of the failing character. The run-time functions <b>pcre32_exec()</b> and
|
||||||
|
<b>pcre32_dfa_exec()</b> also pass back this information, as well as a more
|
||||||
|
detailed reason code if the caller has provided memory in which to do this.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In some situations, you may already know that your strings are valid, and
|
||||||
|
therefore want to skip these checks in order to improve performance. If you set
|
||||||
|
the PCRE_NO_UTF32_CHECK flag at compile time or at run time, PCRE assumes that
|
||||||
|
the pattern or subject it is given (respectively) contains only valid UTF-32
|
||||||
|
sequences. In this case, it does not diagnose an invalid UTF-32 string.
|
||||||
|
However, if an invalid string is passed, the result is undefined.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
General comments about UTF modes
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
1. Codepoints less than 256 can be specified in patterns by either braced or
|
||||||
|
unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger
|
||||||
|
values have to use braced sequences.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
2. Octal numbers up to \777 are recognized, and in UTF-8 mode they match
|
||||||
|
two-byte characters for values greater than \177.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
3. Repeat quantifiers apply to complete UTF characters, not to individual
|
||||||
|
data units, for example: \x{100}{3}.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
4. The dot metacharacter matches one UTF character instead of a single data
|
||||||
|
unit.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
5. The escape sequence \C can be used to match a single byte in UTF-8 mode, or
|
||||||
|
a single 16-bit data unit in UTF-16 mode, or a single 32-bit data unit in
|
||||||
|
UTF-32 mode, but its use can lead to some strange effects because it breaks up
|
||||||
|
multi-unit characters (see the description of \C in the
|
||||||
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
|
documentation). The use of \C is not supported in the alternative matching
|
||||||
|
function <b>pcre[16|32]_dfa_exec()</b>, nor is it supported in UTF mode by the
|
||||||
|
JIT optimization of <b>pcre[16|32]_exec()</b>. If JIT optimization is requested
|
||||||
|
for a UTF pattern that contains \C, it will not succeed, and so the matching
|
||||||
|
will be carried out by the normal interpretive function.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
|
||||||
|
test characters of any code value, but, by default, the characters that PCRE
|
||||||
|
recognizes as digits, spaces, or word characters remain the same set as in
|
||||||
|
non-UTF mode, all with values less than 256. This remains true even when PCRE
|
||||||
|
is built to include Unicode property support, because to do otherwise would
|
||||||
|
slow down PCRE in many common cases. Note in particular that this applies to
|
||||||
|
\b and \B, because they are defined in terms of \w and \W. If you really
|
||||||
|
want to test for a wider sense of, say, "digit", you can use explicit Unicode
|
||||||
|
property tests such as \p{Nd}. Alternatively, if you set the PCRE_UCP option,
|
||||||
|
the way that the character escapes work is changed so that Unicode properties
|
||||||
|
are used to determine which characters match. There are more details in the
|
||||||
|
section on
|
||||||
|
<a href="pcrepattern.html#genericchartypes">generic character types</a>
|
||||||
|
in the
|
||||||
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
|
documentation.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
7. Similarly, characters that match the POSIX named character classes are all
|
||||||
|
low-valued characters, unless the PCRE_UCP option is set.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
8. However, the horizontal and vertical white space matching escapes (\h, \H,
|
||||||
|
\v, and \V) do match all the appropriate Unicode characters, whether or not
|
||||||
|
PCRE_UCP is set.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
9. Case-insensitive matching applies only to characters whose values are less
|
||||||
|
than 128, unless PCRE is built with Unicode property support. A few Unicode
|
||||||
|
characters such as Greek sigma have more than two codepoints that are
|
||||||
|
case-equivalent. Up to and including PCRE release 8.31, only one-to-one case
|
||||||
|
mappings were supported, but later releases (with Unicode property support) do
|
||||||
|
treat as case-equivalent all versions of characters such as Greek sigma.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
AUTHOR
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Philip Hazel
|
||||||
|
<br>
|
||||||
|
University Computing Service
|
||||||
|
<br>
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
<br>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
REVISION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Last updated: 11 November 2012
|
||||||
|
<br>
|
||||||
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
|
<br>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -18,6 +18,12 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<tr><td><a href="pcre.html">pcre</a></td>
|
<tr><td><a href="pcre.html">pcre</a></td>
|
||||||
<td> Introductory page</td></tr>
|
<td> Introductory page</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre16.html">pcre16</a></td>
|
||||||
|
<td> Discussion of the 16-bit PCRE library</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre32.html">pcre32</a></td>
|
||||||
|
<td> Discussion of the 32-bit PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||||
<td> Information about the installation configuration</td></tr>
|
<td> Information about the installation configuration</td></tr>
|
||||||
|
|
||||||
@ -36,9 +42,18 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
|
<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
|
||||||
<td> The C++ wrapper for the PCRE library</td></tr>
|
<td> The C++ wrapper for the PCRE library</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcredemo.html">pcredemo</a></td>
|
||||||
|
<td> A demonstration C program that uses the PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcregrep.html">pcregrep</a></td>
|
<tr><td><a href="pcregrep.html">pcregrep</a></td>
|
||||||
<td> The <b>pcregrep</b> command</td></tr>
|
<td> The <b>pcregrep</b> command</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcrejit.html">pcrejit</a></td>
|
||||||
|
<td> Discussion of the just-in-time optimization support</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcrelimits.html">pcrelimits</a></td>
|
||||||
|
<td> Details of size and other limits</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrematching.html">pcrematching</a></td>
|
<tr><td><a href="pcrematching.html">pcrematching</a></td>
|
||||||
<td> Discussion of the two matching algorithms</td></tr>
|
<td> Discussion of the two matching algorithms</td></tr>
|
||||||
|
|
||||||
@ -58,7 +73,7 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<td> How to save and re-use compiled patterns</td></tr>
|
<td> How to save and re-use compiled patterns</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcresample.html">pcresample</a></td>
|
<tr><td><a href="pcresample.html">pcresample</a></td>
|
||||||
<td> Description of the sample program</td></tr>
|
<td> Discussion of the pcredemo program</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrestack.html">pcrestack</a></td>
|
<tr><td><a href="pcrestack.html">pcrestack</a></td>
|
||||||
<td> Discussion of PCRE's stack usage</td></tr>
|
<td> Discussion of PCRE's stack usage</td></tr>
|
||||||
@ -68,15 +83,22 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
|
|
||||||
<tr><td><a href="pcretest.html">pcretest</a></td>
|
<tr><td><a href="pcretest.html">pcretest</a></td>
|
||||||
<td> The <b>pcretest</b> command for testing PCRE</td></tr>
|
<td> The <b>pcretest</b> command for testing PCRE</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
|
||||||
|
<td> Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
There are also individual pages that summarize the interface for each function
|
There are also individual pages that summarize the interface for each function
|
||||||
in the library:
|
in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
|
||||||
|
functions.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_assign_jit_stack.html">pcre_assign_jit_stack</a></td>
|
||||||
|
<td> Assign stack for JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
|
<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
|
||||||
<td> Compile a regular expression</td></tr>
|
<td> Compile a regular expression</td></tr>
|
||||||
|
|
||||||
@ -96,6 +118,9 @@ in the library:
|
|||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||||
|
<td> Free study data</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(Perl compatible)</td></tr>
|
(Perl compatible)</td></tr>
|
||||||
@ -124,15 +149,30 @@ in the library:
|
|||||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
||||||
<td> Obsolete information extraction function</td></tr>
|
<td> Obsolete information extraction function</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
||||||
|
<td> Create a stack for JIT matching</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_jit_stack_free.html">pcre_jit_stack_free</a></td>
|
||||||
|
<td> Free a JIT matching stack</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
|
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
|
||||||
<td> Build character tables in current locale</td></tr>
|
<td> Build character tables in current locale</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_pattern_to_host_byte_order.html">pcre_pattern_to_host_byte_order</a></td>
|
||||||
|
<td> Convert compiled pattern to host byte order if necessary</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
|
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
|
||||||
<td> Maintain reference count in compiled pattern</td></tr>
|
<td> Maintain reference count in compiled pattern</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_study.html">pcre_study</a></td>
|
<tr><td><a href="pcre_study.html">pcre_study</a></td>
|
||||||
<td> Study a compiled pattern</td></tr>
|
<td> Study a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_utf16_to_host_byte_order.html">pcre_utf16_to_host_byte_order</a></td>
|
||||||
|
<td> Convert UTF-16 string to host byte order if necessary</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_utf32_to_host_byte_order.html">pcre_utf32_to_host_byte_order</a></td>
|
||||||
|
<td> Convert UTF-32 string to host byte order if necessary</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_version.html">pcre_version</a></td>
|
<tr><td><a href="pcre_version.html">pcre_version</a></td>
|
||||||
<td> Return PCRE version and release date</td></tr>
|
<td> Return PCRE version and release date</td></tr>
|
||||||
</table>
|
</table>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE-CONFIG 1
|
.TH PCRE-CONFIG 1 "01 January 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre-config - program to return PCRE configuration
|
pcre-config - program to return PCRE configuration
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -6,14 +6,20 @@ pcre-config - program to return PCRE configuration
|
|||||||
.sp
|
.sp
|
||||||
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B [--libs-posix] [--cflags] [--cflags-posix]
|
.B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
||||||
|
.ti +5n
|
||||||
|
.B [--cflags] [--cflags-posix]
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
\fBpcre-config\fP returns the configuration of the installed PCRE
|
\fBpcre-config\fP returns the configuration of the installed PCRE
|
||||||
libraries and the options required to compile a program to use them.
|
libraries and the options required to compile a program to use them. Some of
|
||||||
|
the options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
|
||||||
|
respectively, and are
|
||||||
|
not available if only one of those libraries has been built. If an unavailable
|
||||||
|
option is encountered, the "usage" information is output.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH OPTIONS
|
.SH OPTIONS
|
||||||
@ -34,11 +40,24 @@ output.
|
|||||||
.TP 10
|
.TP 10
|
||||||
\fB--libs\fP
|
\fB--libs\fP
|
||||||
Writes to the standard output the command line options required to link
|
Writes to the standard output the command line options required to link
|
||||||
with PCRE (\fB-lpcre\fP on many systems).
|
with the 8-bit PCRE library (\fB-lpcre\fP on many systems).
|
||||||
|
.TP 10
|
||||||
|
\fB--libs16\fP
|
||||||
|
Writes to the standard output the command line options required to link
|
||||||
|
with the 16-bit PCRE library (\fB-lpcre16\fP on many systems).
|
||||||
|
.TP 10
|
||||||
|
\fB--libs32\fP
|
||||||
|
Writes to the standard output the command line options required to link
|
||||||
|
with the 32-bit PCRE library (\fB-lpcre32\fP on many systems).
|
||||||
|
.TP 10
|
||||||
|
\fB--libs-cpp\fP
|
||||||
|
Writes to the standard output the command line options required to link with
|
||||||
|
PCRE's C++ wrapper library (\fB-lpcrecpp\fP \fB-lpcre\fP on many
|
||||||
|
systems).
|
||||||
.TP 10
|
.TP 10
|
||||||
\fB--libs-posix\fP
|
\fB--libs-posix\fP
|
||||||
Writes to the standard output the command line options required to link with
|
Writes to the standard output the command line options required to link with
|
||||||
the PCRE posix emulation library (\fB-lpcreposix\fP \fB-lpcre\fP on many
|
PCRE's POSIX API wrapper library (\fB-lpcreposix\fP \fB-lpcre\fP on many
|
||||||
systems).
|
systems).
|
||||||
.TP 10
|
.TP 10
|
||||||
\fB--cflags\fP
|
\fB--cflags\fP
|
||||||
@ -48,7 +67,7 @@ many systems).
|
|||||||
.TP 10
|
.TP 10
|
||||||
\fB--cflags-posix\fP
|
\fB--cflags-posix\fP
|
||||||
Writes to the standard output the command line options required to compile
|
Writes to the standard output the command line options required to compile
|
||||||
files that use the PCRE posix emulation library (this may include some \fB-I\fP
|
files that use PCRE's POSIX API wrapper library (this may include some \fB-I\fP
|
||||||
options, but is blank on many systems).
|
options, but is blank on many systems).
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
@ -62,12 +81,12 @@ options, but is blank on many systems).
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This manual page was originally written by Mark Baker for the Debian GNU/Linux
|
This manual page was originally written by Mark Baker for the Debian GNU/Linux
|
||||||
system. It has been slightly revised as a generic PCRE man page.
|
system. It has been subsequently revised as a generic PCRE man page.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH REVISION
|
.SH REVISION
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 18 April 2007
|
Last updated: 24 June 2012
|
||||||
.fi
|
.fi
|
||||||
|
@ -8,13 +8,18 @@ NAME
|
|||||||
SYNOPSIS
|
SYNOPSIS
|
||||||
|
|
||||||
pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||||
[--libs-posix] [--cflags] [--cflags-posix]
|
[--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
||||||
|
[--cflags] [--cflags-posix]
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
|
||||||
pcre-config returns the configuration of the installed PCRE libraries
|
pcre-config returns the configuration of the installed PCRE libraries
|
||||||
and the options required to compile a program to use them.
|
and the options required to compile a program to use them. Some of the
|
||||||
|
options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
|
||||||
|
respectively, and are not available if only one of those libraries has
|
||||||
|
been built. If an unavailable option is encountered, the "usage" infor-
|
||||||
|
mation is output.
|
||||||
|
|
||||||
|
|
||||||
OPTIONS
|
OPTIONS
|
||||||
@ -32,21 +37,35 @@ OPTIONS
|
|||||||
the standard output.
|
the standard output.
|
||||||
|
|
||||||
--libs Writes to the standard output the command line options
|
--libs Writes to the standard output the command line options
|
||||||
required to link with PCRE (-lpcre on many systems).
|
required to link with the 8-bit PCRE library (-lpcre on many
|
||||||
|
systems).
|
||||||
|
|
||||||
|
--libs16 Writes to the standard output the command line options
|
||||||
|
required to link with the 16-bit PCRE library (-lpcre16 on
|
||||||
|
many systems).
|
||||||
|
|
||||||
|
--libs32 Writes to the standard output the command line options
|
||||||
|
required to link with the 32-bit PCRE library (-lpcre32 on
|
||||||
|
many systems).
|
||||||
|
|
||||||
|
--libs-cpp
|
||||||
|
Writes to the standard output the command line options
|
||||||
|
required to link with PCRE's C++ wrapper library (-lpcrecpp
|
||||||
|
-lpcre on many systems).
|
||||||
|
|
||||||
--libs-posix
|
--libs-posix
|
||||||
Writes to the standard output the command line options
|
Writes to the standard output the command line options
|
||||||
required to link with the PCRE posix emulation library
|
required to link with PCRE's POSIX API wrapper library
|
||||||
(-lpcreposix -lpcre on many systems).
|
(-lpcreposix -lpcre on many systems).
|
||||||
|
|
||||||
--cflags Writes to the standard output the command line options
|
--cflags Writes to the standard output the command line options
|
||||||
required to compile files that use PCRE (this may include
|
required to compile files that use PCRE (this may include
|
||||||
some -I options, but is blank on many systems).
|
some -I options, but is blank on many systems).
|
||||||
|
|
||||||
--cflags-posix
|
--cflags-posix
|
||||||
Writes to the standard output the command line options
|
Writes to the standard output the command line options
|
||||||
required to compile files that use the PCRE posix emulation
|
required to compile files that use PCRE's POSIX API wrapper
|
||||||
library (this may include some -I options, but is blank on
|
library (this may include some -I options, but is blank on
|
||||||
many systems).
|
many systems).
|
||||||
|
|
||||||
|
|
||||||
@ -57,11 +76,11 @@ SEE ALSO
|
|||||||
|
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
|
||||||
This manual page was originally written by Mark Baker for the Debian
|
This manual page was originally written by Mark Baker for the Debian
|
||||||
GNU/Linux system. It has been slightly revised as a generic PCRE man
|
GNU/Linux system. It has been subsequently revised as a generic PCRE
|
||||||
page.
|
man page.
|
||||||
|
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 18 April 2007
|
Last updated: 24 June 2012
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE 3
|
.TH PCRE 3 "11 November 2012" "PCRE 8.32"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH INTRODUCTION
|
.SH INTRODUCTION
|
||||||
@ -6,21 +6,50 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
The PCRE library is a set of functions that implement regular expression
|
The PCRE library is a set of functions that implement regular expression
|
||||||
pattern matching using the same syntax and semantics as Perl, with just a few
|
pattern matching using the same syntax and semantics as Perl, with just a few
|
||||||
differences. Certain features that appeared in Python and PCRE before they
|
differences. Some features that appeared in Python and PCRE before they
|
||||||
appeared in Perl are also available using the Python syntax. There is also some
|
appeared in Perl are also available using the Python syntax, there is some
|
||||||
support for certain .NET and Oniguruma syntax items, and there is an option for
|
support for one or two .NET and Oniguruma syntax items, and there is an option
|
||||||
requesting some minor changes that give better JavaScript compatibility.
|
for requesting some minor changes that give better JavaScript compatibility.
|
||||||
.P
|
.P
|
||||||
The current implementation of PCRE (release 7.x) corresponds approximately with
|
Starting with release 8.30, it is possible to compile two separate PCRE
|
||||||
Perl 5.10, including support for UTF-8 encoded strings and Unicode general
|
libraries: the original, which supports 8-bit character strings (including
|
||||||
category properties. However, UTF-8 and Unicode support has to be explicitly
|
UTF-8 strings), and a second library that supports 16-bit character strings
|
||||||
|
(including UTF-16 strings). The build process allows either one or both to be
|
||||||
|
built. The majority of the work to make this possible was done by Zoltan
|
||||||
|
Herczeg.
|
||||||
|
.P
|
||||||
|
Starting with release 8.32 it is possible to compile a third separate PCRE
|
||||||
|
library, which supports 32-bit character strings (including
|
||||||
|
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
|
||||||
|
libraries. The work to make this possible was done by Christian Persch.
|
||||||
|
.P
|
||||||
|
The three libraries contain identical sets of functions, except that the names
|
||||||
|
in the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the
|
||||||
|
names in the 32-bit library start with \fBpcre32_\fP instead of \fBpcre_\fP. To
|
||||||
|
avoid over-complication and reduce the documentation maintenance load, most of
|
||||||
|
the documentation describes the 8-bit library, with the differences for the
|
||||||
|
16-bit and 32-bit libraries described separately in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre16\fP
|
||||||
|
and
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre32\fP
|
||||||
|
.\"
|
||||||
|
pages. References to functions or structures of the form \fIpcre[16|32]_xxx\fP
|
||||||
|
should be read as meaning "\fIpcre_xxx\fP when using the 8-bit library,
|
||||||
|
\fIpcre16_xxx\fP when using the 16-bit library, or \fIpcre32_xxx\fP when using
|
||||||
|
the 32-bit library".
|
||||||
|
.P
|
||||||
|
The current implementation of PCRE corresponds approximately with Perl 5.12,
|
||||||
|
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||||
|
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||||
release 5.1.
|
release 6.2.0.
|
||||||
.P
|
.P
|
||||||
In addition to the Perl-compatible matching function, PCRE contains an
|
In addition to the Perl-compatible matching function, PCRE contains an
|
||||||
alternative matching function that matches the same compiled patterns in a
|
alternative function that matches the same compiled patterns in a different
|
||||||
different way. In certain circumstances, the alternative function has some
|
way. In certain circumstances, the alternative function has some advantages.
|
||||||
advantages. For a discussion of the two matching algorithms, see the
|
For a discussion of the two matching algorithms, see the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrematching\fP
|
\fBpcrematching\fP
|
||||||
.\"
|
.\"
|
||||||
@ -28,13 +57,13 @@ page.
|
|||||||
.P
|
.P
|
||||||
PCRE is written in C and released as a C library. A number of people have
|
PCRE is written in C and released as a C library. A number of people have
|
||||||
written wrappers and interfaces of various kinds. In particular, Google Inc.
|
written wrappers and interfaces of various kinds. In particular, Google Inc.
|
||||||
have provided a comprehensive C++ wrapper. This is now included as part of the
|
have provided a comprehensive C++ wrapper for the 8-bit library. This is now
|
||||||
PCRE distribution. The
|
included as part of the PCRE distribution. The
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrecpp\fP
|
\fBpcrecpp\fP
|
||||||
.\"
|
.\"
|
||||||
page has details of this interface. Other people's contributions can be found
|
page has details of this interface. Other people's contributions can be found
|
||||||
in the \fIContrib\fR directory at the primary FTP site, which is:
|
in the \fIContrib\fP directory at the primary FTP site, which is:
|
||||||
.sp
|
.sp
|
||||||
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
|
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
@ -43,22 +72,22 @@ ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
|
|||||||
Details of exactly which Perl regular expression features are and are not
|
Details of exactly which Perl regular expression features are and are not
|
||||||
supported by PCRE are given in separate documents. See the
|
supported by PCRE are given in separate documents. See the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrepattern\fR
|
\fBpcrepattern\fP
|
||||||
.\"
|
.\"
|
||||||
and
|
and
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrecompat\fR
|
\fBpcrecompat\fP
|
||||||
.\"
|
.\"
|
||||||
pages. There is a syntax summary in the
|
pages. There is a syntax summary in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcresyntax\fR
|
\fBpcresyntax\fP
|
||||||
.\"
|
.\"
|
||||||
page.
|
page.
|
||||||
.P
|
.P
|
||||||
Some features of PCRE can be included, excluded, or changed when the library is
|
Some features of PCRE can be included, excluded, or changed when the library is
|
||||||
built. The
|
built. The
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre_config()\fR
|
\fBpcre_config()\fP
|
||||||
.\"
|
.\"
|
||||||
function makes it possible for a client to discover which features are
|
function makes it possible for a client to discover which features are
|
||||||
available. The features themselves are described in the
|
available. The features themselves are described in the
|
||||||
@ -66,15 +95,48 @@ available. The features themselves are described in the
|
|||||||
\fBpcrebuild\fP
|
\fBpcrebuild\fP
|
||||||
.\"
|
.\"
|
||||||
page. Documentation about building PCRE for various operating systems can be
|
page. Documentation about building PCRE for various operating systems can be
|
||||||
found in the \fBREADME\fP file in the source distribution.
|
found in the \fBREADME\fP and \fBNON-AUTOTOOLS_BUILD\fP files in the source
|
||||||
|
distribution.
|
||||||
.P
|
.P
|
||||||
The library contains a number of undocumented internal functions and data
|
The libraries contains a number of undocumented internal functions and data
|
||||||
tables that are used by more than one of the exported external functions, but
|
tables that are used by more than one of the exported external functions, but
|
||||||
which are not intended for use by external callers. Their names all begin with
|
which are not intended for use by external callers. Their names all begin with
|
||||||
"_pcre_", which hopefully will not provoke any name clashes. In some
|
"_pcre_" or "_pcre16_" or "_pcre32_", which hopefully will not provoke any name
|
||||||
environments, it is possible to control which external symbols are exported
|
clashes. In some environments, it is possible to control which external symbols
|
||||||
when a shared library is built, and in these cases the undocumented symbols are
|
are exported when a shared library is built, and in these cases the
|
||||||
not exported.
|
undocumented symbols are not exported.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "SECURITY CONSIDERATIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
If you are using PCRE in a non-UTF application that permits users to supply
|
||||||
|
arbitrary patterns for compilation, you should be aware of a feature that
|
||||||
|
allows users to turn on UTF support from within a pattern, provided that PCRE
|
||||||
|
was built with UTF support. For example, an 8-bit pattern that begins with
|
||||||
|
"(*UTF8)" or "(*UTF)" turns on UTF-8 mode, which interprets patterns and
|
||||||
|
subjects as strings of UTF-8 characters instead of individual 8-bit characters.
|
||||||
|
This causes both the pattern and any data against which it is matched to be
|
||||||
|
checked for UTF-8 validity. If the data string is very long, such a check might
|
||||||
|
use sufficiently many resources as to cause your application to lose
|
||||||
|
performance.
|
||||||
|
.P
|
||||||
|
The best way of guarding against this possibility is to use the
|
||||||
|
\fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF.
|
||||||
|
.P
|
||||||
|
If your application is one that supports UTF, be aware that validity checking
|
||||||
|
can take time. If the same data string is to be matched many times, you can use
|
||||||
|
the PCRE_NO_UTF[8|16|32]_CHECK option for the second and subsequent matches to
|
||||||
|
save redundant checks.
|
||||||
|
.P
|
||||||
|
Another way that performance can be hit is by running a pattern that has a very
|
||||||
|
large search tree against a string that will never match. Nested unlimited
|
||||||
|
repeats in a pattern are a common example. PCRE provides some protection
|
||||||
|
against this: see the PCRE_EXTRA_MATCH_LIMIT feature in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreapi\fP
|
||||||
|
.\"
|
||||||
|
page.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "USER DOCUMENTATION"
|
.SH "USER DOCUMENTATION"
|
||||||
@ -83,196 +145,40 @@ not exported.
|
|||||||
The user documentation for PCRE comprises a number of different sections. In
|
The user documentation for PCRE comprises a number of different sections. In
|
||||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||||
each is a separate page, linked from the index page. In the plain text format,
|
each is a separate page, linked from the index page. In the plain text format,
|
||||||
all the sections are concatenated, for ease of searching. The sections are as
|
all the sections, except the \fBpcredemo\fP section, are concatenated, for ease
|
||||||
follows:
|
of searching. The sections are as follows:
|
||||||
.sp
|
.sp
|
||||||
pcre this document
|
pcre this document
|
||||||
|
pcre16 details of the 16-bit library
|
||||||
|
pcre32 details of the 32-bit library
|
||||||
pcre-config show PCRE installation configuration information
|
pcre-config show PCRE installation configuration information
|
||||||
pcreapi details of PCRE's native C API
|
pcreapi details of PCRE's native C API
|
||||||
pcrebuild options for building PCRE
|
pcrebuild options for building PCRE
|
||||||
pcrecallout details of the callout feature
|
pcrecallout details of the callout feature
|
||||||
pcrecompat discussion of Perl compatibility
|
pcrecompat discussion of Perl compatibility
|
||||||
pcrecpp details of the C++ wrapper
|
pcrecpp details of the C++ wrapper for the 8-bit library
|
||||||
pcregrep description of the \fBpcregrep\fP command
|
pcredemo a demonstration C program that uses PCRE
|
||||||
|
pcregrep description of the \fBpcregrep\fP command (8-bit only)
|
||||||
|
pcrejit discussion of the just-in-time optimization support
|
||||||
|
pcrelimits details of size and other limits
|
||||||
pcrematching discussion of the two matching algorithms
|
pcrematching discussion of the two matching algorithms
|
||||||
pcrepartial details of the partial matching facility
|
pcrepartial details of the partial matching facility
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
pcrepattern syntax and semantics of supported
|
pcrepattern syntax and semantics of supported
|
||||||
regular expressions
|
regular expressions
|
||||||
pcresyntax quick syntax reference
|
|
||||||
pcreperform discussion of performance issues
|
pcreperform discussion of performance issues
|
||||||
pcreposix the POSIX-compatible C API
|
pcreposix the POSIX-compatible C API for the 8-bit library
|
||||||
pcreprecompile details of saving and re-using precompiled patterns
|
pcreprecompile details of saving and re-using precompiled patterns
|
||||||
pcresample discussion of the sample program
|
pcresample discussion of the pcredemo program
|
||||||
pcrestack discussion of stack usage
|
pcrestack discussion of stack usage
|
||||||
|
pcresyntax quick syntax reference
|
||||||
pcretest description of the \fBpcretest\fP testing command
|
pcretest description of the \fBpcretest\fP testing command
|
||||||
|
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
||||||
.sp
|
.sp
|
||||||
In addition, in the "man" and HTML formats, there is a short page for each
|
In addition, in the "man" and HTML formats, there is a short page for each
|
||||||
C library function, listing its arguments and results.
|
C library function, listing its arguments and results.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH LIMITATIONS
|
|
||||||
.rs
|
|
||||||
.sp
|
|
||||||
There are some size limitations in PCRE but it is hoped that they will never in
|
|
||||||
practice be relevant.
|
|
||||||
.P
|
|
||||||
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
|
|
||||||
compiled with the default internal linkage size of 2. If you want to process
|
|
||||||
regular expressions that are truly enormous, you can compile PCRE with an
|
|
||||||
internal linkage size of 3 or 4 (see the \fBREADME\fP file in the source
|
|
||||||
distribution and the
|
|
||||||
.\" HREF
|
|
||||||
\fBpcrebuild\fP
|
|
||||||
.\"
|
|
||||||
documentation for details). In these cases the limit is substantially larger.
|
|
||||||
However, the speed of execution is slower.
|
|
||||||
.P
|
|
||||||
All values in repeating quantifiers must be less than 65536.
|
|
||||||
.P
|
|
||||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
|
||||||
no more than 65535 capturing subpatterns.
|
|
||||||
.P
|
|
||||||
The maximum length of name for a named subpattern is 32 characters, and the
|
|
||||||
maximum number of named subpatterns is 10000.
|
|
||||||
.P
|
|
||||||
The maximum length of a subject string is the largest positive number that an
|
|
||||||
integer variable can hold. However, when using the traditional matching
|
|
||||||
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
|
|
||||||
This means that the available stack space may limit the size of a subject
|
|
||||||
string that can be processed by certain patterns. For a discussion of stack
|
|
||||||
issues, see the
|
|
||||||
.\" HREF
|
|
||||||
\fBpcrestack\fP
|
|
||||||
.\"
|
|
||||||
documentation.
|
|
||||||
.
|
|
||||||
.\" HTML <a name="utf8support"></a>
|
|
||||||
.
|
|
||||||
.
|
|
||||||
.SH "UTF-8 AND UNICODE PROPERTY SUPPORT"
|
|
||||||
.rs
|
|
||||||
.sp
|
|
||||||
From release 3.3, PCRE has had some support for character strings encoded in
|
|
||||||
the UTF-8 format. For release 4.0 this was greatly extended to cover most
|
|
||||||
common requirements, and in release 5.0 additional support for Unicode general
|
|
||||||
category properties was added.
|
|
||||||
.P
|
|
||||||
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
|
|
||||||
the code, and, in addition, you must call
|
|
||||||
.\" HREF
|
|
||||||
\fBpcre_compile()\fP
|
|
||||||
.\"
|
|
||||||
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
|
|
||||||
(*UTF8). When either of these is the case, both the pattern and any subject
|
|
||||||
strings that are matched against it are treated as UTF-8 strings instead of
|
|
||||||
just strings of bytes.
|
|
||||||
.P
|
|
||||||
If you compile PCRE with UTF-8 support, but do not use it at run time, the
|
|
||||||
library will be a bit bigger, but the additional run time overhead is limited
|
|
||||||
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
|
|
||||||
.P
|
|
||||||
If PCRE is built with Unicode character property support (which implies UTF-8
|
|
||||||
support), the escape sequences \ep{..}, \eP{..}, and \eX are supported.
|
|
||||||
The available properties that can be tested are limited to the general
|
|
||||||
category properties such as Lu for an upper case letter or Nd for a decimal
|
|
||||||
number, the Unicode script names such as Arabic or Han, and the derived
|
|
||||||
properties Any and L&. A full list is given in the
|
|
||||||
.\" HREF
|
|
||||||
\fBpcrepattern\fP
|
|
||||||
.\"
|
|
||||||
documentation. Only the short names for properties are supported. For example,
|
|
||||||
\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
|
|
||||||
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
|
||||||
compatibility with Perl 5.6. PCRE does not support this.
|
|
||||||
.
|
|
||||||
.\" HTML <a name="utf8strings"></a>
|
|
||||||
.
|
|
||||||
.SS "Validity of UTF-8 strings"
|
|
||||||
.rs
|
|
||||||
.sp
|
|
||||||
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
|
|
||||||
are (by default) checked for validity on entry to the relevant functions. From
|
|
||||||
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
|
|
||||||
themselves derived from the Unicode specification. Earlier releases of PCRE
|
|
||||||
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
|
|
||||||
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
|
|
||||||
U+10FFFF, excluding U+D800 to U+DFFF.
|
|
||||||
.P
|
|
||||||
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
|
|
||||||
Unicode Standard says this: "The Low Surrogate Area does not contain any
|
|
||||||
character assignments, consequently no character code charts or namelists are
|
|
||||||
provided for this area. Surrogates are reserved for use with UTF-16 and then
|
|
||||||
must be used in pairs." The code points that are encoded by UTF-16 pairs are
|
|
||||||
available as independent code points in the UTF-8 encoding. (In other words,
|
|
||||||
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
|
|
||||||
UTF-8.)
|
|
||||||
.P
|
|
||||||
If an invalid UTF-8 string is passed to PCRE, an error return
|
|
||||||
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
|
|
||||||
your strings are valid, and therefore want to skip these checks in order to
|
|
||||||
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
|
|
||||||
at run time, PCRE assumes that the pattern or subject it is given
|
|
||||||
(respectively) contains only valid UTF-8 codes. In this case, it does not
|
|
||||||
diagnose an invalid UTF-8 string.
|
|
||||||
.P
|
|
||||||
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
|
|
||||||
happens depends on why the string is invalid. If the string conforms to the
|
|
||||||
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
|
|
||||||
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
|
|
||||||
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
|
|
||||||
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
|
|
||||||
the result is undefined. Your program may crash.
|
|
||||||
.P
|
|
||||||
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
|
|
||||||
encoded in a UTF-8-like manner as per the old RFC, you can set
|
|
||||||
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
|
|
||||||
situation, you will have to apply your own validity check.
|
|
||||||
.
|
|
||||||
.SS "General comments about UTF-8 mode"
|
|
||||||
.rs
|
|
||||||
.sp
|
|
||||||
1. An unbraced hexadecimal escape sequence (such as \exb3) matches a two-byte
|
|
||||||
UTF-8 character if the value is greater than 127.
|
|
||||||
.P
|
|
||||||
2. Octal numbers up to \e777 are recognized, and match two-byte UTF-8
|
|
||||||
characters for values greater than \e177.
|
|
||||||
.P
|
|
||||||
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
|
|
||||||
bytes, for example: \ex{100}{3}.
|
|
||||||
.P
|
|
||||||
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
|
|
||||||
.P
|
|
||||||
5. The escape sequence \eC can be used to match a single byte in UTF-8 mode,
|
|
||||||
but its use can lead to some strange effects. This facility is not available in
|
|
||||||
the alternative matching function, \fBpcre_dfa_exec()\fP.
|
|
||||||
.P
|
|
||||||
6. The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly
|
|
||||||
test characters of any code value, but the characters that PCRE recognizes as
|
|
||||||
digits, spaces, or word characters remain the same set as before, all with
|
|
||||||
values less than 256. This remains true even when PCRE includes Unicode
|
|
||||||
property support, because to do otherwise would slow down PCRE in many common
|
|
||||||
cases. If you really want to test for a wider sense of, say, "digit", you
|
|
||||||
must use Unicode property tests such as \ep{Nd}. Note that this also applies to
|
|
||||||
\eb, because it is defined in terms of \ew and \eW.
|
|
||||||
.P
|
|
||||||
7. Similarly, characters that match the POSIX named character classes are all
|
|
||||||
low-valued characters.
|
|
||||||
.P
|
|
||||||
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
|
|
||||||
(\eh, \eH, \ev, and \eV) do match all the appropriate Unicode characters.
|
|
||||||
.P
|
|
||||||
9. Case-insensitive matching applies only to characters whose values are less
|
|
||||||
than 128, unless PCRE is built with Unicode property support. Even when Unicode
|
|
||||||
property support is available, PCRE still uses its own character tables when
|
|
||||||
checking the case of low-valued characters, so as not to degrade performance.
|
|
||||||
The Unicode property information is used only for characters with higher
|
|
||||||
values. Even when Unicode property support is available, PCRE supports
|
|
||||||
case-insensitive matching only when there is a one-to-one mapping between a
|
|
||||||
letter's cases. There are a small number of many-to-one mappings in Unicode;
|
|
||||||
these are not supported by PCRE.
|
|
||||||
.
|
|
||||||
.
|
|
||||||
.SH AUTHOR
|
.SH AUTHOR
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
@ -291,6 +197,6 @@ two digits 10, at the domain cam.ac.uk.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 11 April 2009
|
Last updated: 11 November 2012
|
||||||
Copyright (c) 1997-2009 University of Cambridge.
|
Copyright (c) 1997-2012 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
File diff suppressed because it is too large
Load Diff
390
tools/pcre/doc/pcre16.3
Normal file
390
tools/pcre/doc/pcre16.3
Normal file
@ -0,0 +1,390 @@
|
|||||||
|
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
|
||||||
|
.SH NAME
|
||||||
|
PCRE - Perl-compatible regular expressions
|
||||||
|
.sp
|
||||||
|
.B #include <pcre.h>
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.SM
|
||||||
|
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIerrorcodeptr\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 \fIname\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
||||||
|
.PP
|
||||||
|
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||||
|
.PP
|
||||||
|
.B const unsigned char *pcre16_maketables(void);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B const char *pcre16_version(void);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B void *(*pcre16_malloc)(size_t);
|
||||||
|
.PP
|
||||||
|
.B void (*pcre16_free)(void *);
|
||||||
|
.PP
|
||||||
|
.B void *(*pcre16_stack_malloc)(size_t);
|
||||||
|
.PP
|
||||||
|
.B void (*pcre16_stack_free)(void *);
|
||||||
|
.PP
|
||||||
|
.B int (*pcre16_callout)(pcre16_callout_block *);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIkeep_boms\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "THE PCRE 16-BIT LIBRARY"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
Starting with release 8.30, it is possible to compile a PCRE library that
|
||||||
|
supports 16-bit character strings, including UTF-16 strings, as well as or
|
||||||
|
instead of the original 8-bit library. The majority of the work to make this
|
||||||
|
possible was done by Zoltan Herczeg. The two libraries contain identical sets
|
||||||
|
of functions, used in exactly the same way. Only the names of the functions and
|
||||||
|
the data types of their arguments and results are different. To avoid
|
||||||
|
over-complication and reduce the documentation maintenance load, most of the
|
||||||
|
PCRE documentation describes the 8-bit library, with only occasional references
|
||||||
|
to the 16-bit library. This page describes what is different when you use the
|
||||||
|
16-bit library.
|
||||||
|
.P
|
||||||
|
WARNING: A single application can be linked with both libraries, but you must
|
||||||
|
take care when processing any particular pattern to use functions from just one
|
||||||
|
library. For example, if you want to study a pattern that was compiled with
|
||||||
|
\fBpcre16_compile()\fP, you must do so with \fBpcre16_study()\fP, not
|
||||||
|
\fBpcre_study()\fP, and you must free the study data with
|
||||||
|
\fBpcre16_free_study()\fP.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "THE HEADER FILE"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
There is only one header file, \fBpcre.h\fP. It contains prototypes for all the
|
||||||
|
functions in all libraries, as well as definitions of flags, structures, error
|
||||||
|
codes, etc.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "THE LIBRARY NAME"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
In Unix-like systems, the 16-bit library is called \fBlibpcre16\fP, and can
|
||||||
|
normally be accesss by adding \fB-lpcre16\fP to the command for linking an
|
||||||
|
application that uses PCRE.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "STRING TYPES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
In the 8-bit library, strings are passed to PCRE library functions as vectors
|
||||||
|
of bytes with the C type "char *". In the 16-bit library, strings are passed as
|
||||||
|
vectors of unsigned 16-bit quantities. The macro PCRE_UCHAR16 specifies an
|
||||||
|
appropriate data type, and PCRE_SPTR16 is defined as "const PCRE_UCHAR16 *". In
|
||||||
|
very many environments, "short int" is a 16-bit data type. When PCRE is built,
|
||||||
|
it defines PCRE_UCHAR16 as "unsigned short int", but checks that it really is a
|
||||||
|
16-bit data type. If it is not, the build fails with an error message telling
|
||||||
|
the maintainer to modify the definition appropriately.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "STRUCTURE TYPES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The types of the opaque structures that are used for compiled 16-bit patterns
|
||||||
|
and JIT stacks are \fBpcre16\fP and \fBpcre16_jit_stack\fP respectively. The
|
||||||
|
type of the user-accessible structure that is returned by \fBpcre16_study()\fP
|
||||||
|
is \fBpcre16_extra\fP, and the type of the structure that is used for passing
|
||||||
|
data to a callout function is \fBpcre16_callout_block\fP. These structures
|
||||||
|
contain the same fields, with the same names, as their 8-bit counterparts. The
|
||||||
|
only difference is that pointers to character strings are 16-bit instead of
|
||||||
|
8-bit types.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "16-BIT FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
For every function in the 8-bit library there is a corresponding function in
|
||||||
|
the 16-bit library with a name that starts with \fBpcre16_\fP instead of
|
||||||
|
\fBpcre_\fP. The prototypes are listed above. In addition, there is one extra
|
||||||
|
function, \fBpcre16_utf16_to_host_byte_order()\fP. This is a utility function
|
||||||
|
that converts a UTF-16 character string to host byte order if necessary. The
|
||||||
|
other 16-bit functions expect the strings they are passed to be in host byte
|
||||||
|
order.
|
||||||
|
.P
|
||||||
|
The \fIinput\fP and \fIoutput\fP arguments of
|
||||||
|
\fBpcre16_utf16_to_host_byte_order()\fP may point to the same address, that is,
|
||||||
|
conversion in place is supported. The output buffer must be at least as long as
|
||||||
|
the input.
|
||||||
|
.P
|
||||||
|
The \fIlength\fP argument specifies the number of 16-bit data units in the
|
||||||
|
input string; a negative value specifies a zero-terminated string.
|
||||||
|
.P
|
||||||
|
If \fIbyte_order\fP is NULL, it is assumed that the string starts off in host
|
||||||
|
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
|
||||||
|
string (commonly as the first character).
|
||||||
|
.P
|
||||||
|
If \fIbyte_order\fP is not NULL, a non-zero value of the integer to which it
|
||||||
|
points means that the input starts off in host byte order, otherwise the
|
||||||
|
opposite order is assumed. Again, BOMs in the string can change this. The final
|
||||||
|
byte order is passed back at the end of processing.
|
||||||
|
.P
|
||||||
|
If \fIkeep_boms\fP is not zero, byte-order mark characters (0xfeff) are copied
|
||||||
|
into the output string. Otherwise they are discarded.
|
||||||
|
.P
|
||||||
|
The result of the function is the number of 16-bit units placed into the output
|
||||||
|
buffer, including the zero terminator if the string was zero-terminated.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "SUBJECT STRING OFFSETS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The offsets within subject strings that are returned by the matching functions
|
||||||
|
are in 16-bit units rather than bytes.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "NAMED SUBPATTERNS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The name-to-number translation table that is maintained for named subpatterns
|
||||||
|
uses 16-bit characters. The \fBpcre16_get_stringtable_entries()\fP function
|
||||||
|
returns the length of each entry in the table as the number of 16-bit data
|
||||||
|
units.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "OPTION NAMES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
There are two new general option names, PCRE_UTF16 and PCRE_NO_UTF16_CHECK,
|
||||||
|
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
|
||||||
|
fact, these new options define the same bits in the options word. There is a
|
||||||
|
discussion about the
|
||||||
|
.\" HTML <a href="pcreunicode.html#utf16strings">
|
||||||
|
.\" </a>
|
||||||
|
validity of UTF-16 strings
|
||||||
|
.\"
|
||||||
|
in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreunicode\fP
|
||||||
|
.\"
|
||||||
|
page.
|
||||||
|
.P
|
||||||
|
For the \fBpcre16_config()\fP function there is an option PCRE_CONFIG_UTF16
|
||||||
|
that returns 1 if UTF-16 support is configured, otherwise 0. If this option is
|
||||||
|
given to \fBpcre_config()\fP or \fBpcre32_config()\fP, or if the
|
||||||
|
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 option is given to \fBpcre16_config()\fP,
|
||||||
|
the result is the PCRE_ERROR_BADOPTION error.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "CHARACTER CODES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
In 16-bit mode, when PCRE_UTF16 is not set, character values are treated in the
|
||||||
|
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
|
||||||
|
from 0 to 0xffff instead of 0 to 0xff. Character types for characters less than
|
||||||
|
0xff can therefore be influenced by the locale in the same way as before.
|
||||||
|
Characters greater than 0xff have only one case, and no "type" (such as letter
|
||||||
|
or digit).
|
||||||
|
.P
|
||||||
|
In UTF-16 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
|
||||||
|
the exception of values in the range 0xd800 to 0xdfff because those are
|
||||||
|
"surrogate" values that are used in pairs to encode values greater than 0xffff.
|
||||||
|
.P
|
||||||
|
A UTF-16 string can indicate its endianness by special code knows as a
|
||||||
|
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
|
||||||
|
to be in host byte order. A utility function called
|
||||||
|
\fBpcre16_utf16_to_host_byte_order()\fP is provided to help with this (see
|
||||||
|
above).
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "ERROR NAMES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The errors PCRE_ERROR_BADUTF16_OFFSET and PCRE_ERROR_SHORTUTF16 correspond to
|
||||||
|
their 8-bit counterparts. The error PCRE_ERROR_BADMODE is given when a compiled
|
||||||
|
pattern is passed to a function that processes patterns in the other
|
||||||
|
mode, for example, if a pattern compiled with \fBpcre_compile()\fP is passed to
|
||||||
|
\fBpcre16_exec()\fP.
|
||||||
|
.P
|
||||||
|
There are new error codes whose names begin with PCRE_UTF16_ERR for invalid
|
||||||
|
UTF-16 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
|
||||||
|
are described in the section entitled
|
||||||
|
.\" HTML <a href="pcreapi.html#badutf8reasons">
|
||||||
|
.\" </a>
|
||||||
|
"Reason codes for invalid UTF-8 strings"
|
||||||
|
.\"
|
||||||
|
in the main
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreapi\fP
|
||||||
|
.\"
|
||||||
|
page. The UTF-16 errors are:
|
||||||
|
.sp
|
||||||
|
PCRE_UTF16_ERR1 Missing low surrogate at end of string
|
||||||
|
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
|
||||||
|
PCRE_UTF16_ERR3 Isolated low surrogate
|
||||||
|
PCRE_UTF16_ERR4 Non-character
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "ERROR TEXTS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
If there is an error while compiling a pattern, the error text that is passed
|
||||||
|
back by \fBpcre16_compile()\fP or \fBpcre16_compile2()\fP is still an 8-bit
|
||||||
|
character string, zero-terminated.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "CALLOUTS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The \fIsubject\fP and \fImark\fP fields in the callout block that is passed to
|
||||||
|
a callout function point to 16-bit vectors.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "TESTING"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The \fBpcretest\fP program continues to operate with 8-bit input and output
|
||||||
|
files, but it can be used for testing the 16-bit library. If it is run with the
|
||||||
|
command line option \fB-16\fP, patterns and subject strings are converted from
|
||||||
|
8-bit to 16-bit before being passed to PCRE, and the 16-bit library functions
|
||||||
|
are used instead of the 8-bit ones. Returned 16-bit strings are converted to
|
||||||
|
8-bit for output. If both the 8-bit and the 32-bit libraries were not compiled,
|
||||||
|
\fBpcretest\fP defaults to 16-bit and the \fB-16\fP option is ignored.
|
||||||
|
.P
|
||||||
|
When PCRE is being built, the \fBRunTest\fP script that is called by "make
|
||||||
|
check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit,
|
||||||
|
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "NOT SUPPORTED IN 16-BIT MODE"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
Not all the features of the 8-bit library are available with the 16-bit
|
||||||
|
library. The C++ and POSIX wrapper functions support only the 8-bit library,
|
||||||
|
and the \fBpcregrep\fP program is at present 8-bit only.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH AUTHOR
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
Philip Hazel
|
||||||
|
University Computing Service
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH REVISION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
Last updated: 08 November 2012
|
||||||
|
Copyright (c) 1997-2012 University of Cambridge.
|
||||||
|
.fi
|
389
tools/pcre/doc/pcre32.3
Normal file
389
tools/pcre/doc/pcre32.3
Normal file
@ -0,0 +1,389 @@
|
|||||||
|
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
|
||||||
|
.SH NAME
|
||||||
|
PCRE - Perl-compatible regular expressions
|
||||||
|
.sp
|
||||||
|
.B #include <pcre.h>
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.SM
|
||||||
|
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIerrorcodeptr\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 \fIname\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
||||||
|
.PP
|
||||||
|
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||||
|
.PP
|
||||||
|
.B const unsigned char *pcre32_maketables(void);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B const char *pcre32_version(void);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B void *(*pcre32_malloc)(size_t);
|
||||||
|
.PP
|
||||||
|
.B void (*pcre32_free)(void *);
|
||||||
|
.PP
|
||||||
|
.B void *(*pcre32_stack_malloc)(size_t);
|
||||||
|
.PP
|
||||||
|
.B void (*pcre32_stack_free)(void *);
|
||||||
|
.PP
|
||||||
|
.B int (*pcre32_callout)(pcre32_callout_block *);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIkeep_boms\fP);
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "THE PCRE 32-BIT LIBRARY"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
Starting with release 8.32, it is possible to compile a PCRE library that
|
||||||
|
supports 32-bit character strings, including UTF-32 strings, as well as or
|
||||||
|
instead of the original 8-bit library. This work was done by Christian Persch,
|
||||||
|
based on the work done by Zoltan Herczeg for the 16-bit library. All three
|
||||||
|
libraries contain identical sets of functions, used in exactly the same way.
|
||||||
|
Only the names of the functions and the data types of their arguments and
|
||||||
|
results are different. To avoid over-complication and reduce the documentation
|
||||||
|
maintenance load, most of the PCRE documentation describes the 8-bit library,
|
||||||
|
with only occasional references to the 16-bit and 32-bit libraries. This page
|
||||||
|
describes what is different when you use the 32-bit library.
|
||||||
|
.P
|
||||||
|
WARNING: A single application can be linked with all or any of the three
|
||||||
|
libraries, but you must take care when processing any particular pattern
|
||||||
|
to use functions from just one library. For example, if you want to study
|
||||||
|
a pattern that was compiled with \fBpcre32_compile()\fP, you must do so
|
||||||
|
with \fBpcre32_study()\fP, not \fBpcre_study()\fP, and you must free the
|
||||||
|
study data with \fBpcre32_free_study()\fP.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "THE HEADER FILE"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
There is only one header file, \fBpcre.h\fP. It contains prototypes for all the
|
||||||
|
functions in all libraries, as well as definitions of flags, structures, error
|
||||||
|
codes, etc.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "THE LIBRARY NAME"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
In Unix-like systems, the 32-bit library is called \fBlibpcre32\fP, and can
|
||||||
|
normally be accesss by adding \fB-lpcre32\fP to the command for linking an
|
||||||
|
application that uses PCRE.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "STRING TYPES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
In the 8-bit library, strings are passed to PCRE library functions as vectors
|
||||||
|
of bytes with the C type "char *". In the 32-bit library, strings are passed as
|
||||||
|
vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an
|
||||||
|
appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In
|
||||||
|
very many environments, "unsigned int" is a 32-bit data type. When PCRE is
|
||||||
|
built, it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is
|
||||||
|
a 32-bit data type. If it is not, the build fails with an error message telling
|
||||||
|
the maintainer to modify the definition appropriately.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "STRUCTURE TYPES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The types of the opaque structures that are used for compiled 32-bit patterns
|
||||||
|
and JIT stacks are \fBpcre32\fP and \fBpcre32_jit_stack\fP respectively. The
|
||||||
|
type of the user-accessible structure that is returned by \fBpcre32_study()\fP
|
||||||
|
is \fBpcre32_extra\fP, and the type of the structure that is used for passing
|
||||||
|
data to a callout function is \fBpcre32_callout_block\fP. These structures
|
||||||
|
contain the same fields, with the same names, as their 8-bit counterparts. The
|
||||||
|
only difference is that pointers to character strings are 32-bit instead of
|
||||||
|
8-bit types.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "32-BIT FUNCTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
For every function in the 8-bit library there is a corresponding function in
|
||||||
|
the 32-bit library with a name that starts with \fBpcre32_\fP instead of
|
||||||
|
\fBpcre_\fP. The prototypes are listed above. In addition, there is one extra
|
||||||
|
function, \fBpcre32_utf32_to_host_byte_order()\fP. This is a utility function
|
||||||
|
that converts a UTF-32 character string to host byte order if necessary. The
|
||||||
|
other 32-bit functions expect the strings they are passed to be in host byte
|
||||||
|
order.
|
||||||
|
.P
|
||||||
|
The \fIinput\fP and \fIoutput\fP arguments of
|
||||||
|
\fBpcre32_utf32_to_host_byte_order()\fP may point to the same address, that is,
|
||||||
|
conversion in place is supported. The output buffer must be at least as long as
|
||||||
|
the input.
|
||||||
|
.P
|
||||||
|
The \fIlength\fP argument specifies the number of 32-bit data units in the
|
||||||
|
input string; a negative value specifies a zero-terminated string.
|
||||||
|
.P
|
||||||
|
If \fIbyte_order\fP is NULL, it is assumed that the string starts off in host
|
||||||
|
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
|
||||||
|
string (commonly as the first character).
|
||||||
|
.P
|
||||||
|
If \fIbyte_order\fP is not NULL, a non-zero value of the integer to which it
|
||||||
|
points means that the input starts off in host byte order, otherwise the
|
||||||
|
opposite order is assumed. Again, BOMs in the string can change this. The final
|
||||||
|
byte order is passed back at the end of processing.
|
||||||
|
.P
|
||||||
|
If \fIkeep_boms\fP is not zero, byte-order mark characters (0xfeff) are copied
|
||||||
|
into the output string. Otherwise they are discarded.
|
||||||
|
.P
|
||||||
|
The result of the function is the number of 32-bit units placed into the output
|
||||||
|
buffer, including the zero terminator if the string was zero-terminated.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "SUBJECT STRING OFFSETS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The offsets within subject strings that are returned by the matching functions
|
||||||
|
are in 32-bit units rather than bytes.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "NAMED SUBPATTERNS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The name-to-number translation table that is maintained for named subpatterns
|
||||||
|
uses 32-bit characters. The \fBpcre32_get_stringtable_entries()\fP function
|
||||||
|
returns the length of each entry in the table as the number of 32-bit data
|
||||||
|
units.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "OPTION NAMES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK,
|
||||||
|
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
|
||||||
|
fact, these new options define the same bits in the options word. There is a
|
||||||
|
discussion about the
|
||||||
|
.\" HTML <a href="pcreunicode.html#utf32strings">
|
||||||
|
.\" </a>
|
||||||
|
validity of UTF-32 strings
|
||||||
|
.\"
|
||||||
|
in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreunicode\fP
|
||||||
|
.\"
|
||||||
|
page.
|
||||||
|
.P
|
||||||
|
For the \fBpcre32_config()\fP function there is an option PCRE_CONFIG_UTF32
|
||||||
|
that returns 1 if UTF-32 support is configured, otherwise 0. If this option is
|
||||||
|
given to \fBpcre_config()\fP or \fBpcre16_config()\fP, or if the
|
||||||
|
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to \fBpcre32_config()\fP,
|
||||||
|
the result is the PCRE_ERROR_BADOPTION error.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "CHARACTER CODES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the
|
||||||
|
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
|
||||||
|
from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less
|
||||||
|
than 0xff can therefore be influenced by the locale in the same way as before.
|
||||||
|
Characters greater than 0xff have only one case, and no "type" (such as letter
|
||||||
|
or digit).
|
||||||
|
.P
|
||||||
|
In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
|
||||||
|
the exception of values in the range 0xd800 to 0xdfff because those are
|
||||||
|
"surrogate" values that are ill-formed in UTF-32.
|
||||||
|
.P
|
||||||
|
A UTF-32 string can indicate its endianness by special code knows as a
|
||||||
|
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
|
||||||
|
to be in host byte order. A utility function called
|
||||||
|
\fBpcre32_utf32_to_host_byte_order()\fP is provided to help with this (see
|
||||||
|
above).
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "ERROR NAMES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The error PCRE_ERROR_BADUTF32 corresponds to its 8-bit counterpart.
|
||||||
|
The error PCRE_ERROR_BADMODE is given when a compiled
|
||||||
|
pattern is passed to a function that processes patterns in the other
|
||||||
|
mode, for example, if a pattern compiled with \fBpcre_compile()\fP is passed to
|
||||||
|
\fBpcre32_exec()\fP.
|
||||||
|
.P
|
||||||
|
There are new error codes whose names begin with PCRE_UTF32_ERR for invalid
|
||||||
|
UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
|
||||||
|
are described in the section entitled
|
||||||
|
.\" HTML <a href="pcreapi.html#badutf8reasons">
|
||||||
|
.\" </a>
|
||||||
|
"Reason codes for invalid UTF-8 strings"
|
||||||
|
.\"
|
||||||
|
in the main
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreapi\fP
|
||||||
|
.\"
|
||||||
|
page. The UTF-32 errors are:
|
||||||
|
.sp
|
||||||
|
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
|
||||||
|
PCRE_UTF32_ERR2 Non-character
|
||||||
|
PCRE_UTF32_ERR3 Character > 0x10ffff
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "ERROR TEXTS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
If there is an error while compiling a pattern, the error text that is passed
|
||||||
|
back by \fBpcre32_compile()\fP or \fBpcre32_compile2()\fP is still an 8-bit
|
||||||
|
character string, zero-terminated.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "CALLOUTS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The \fIsubject\fP and \fImark\fP fields in the callout block that is passed to
|
||||||
|
a callout function point to 32-bit vectors.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "TESTING"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The \fBpcretest\fP program continues to operate with 8-bit input and output
|
||||||
|
files, but it can be used for testing the 32-bit library. If it is run with the
|
||||||
|
command line option \fB-32\fP, patterns and subject strings are converted from
|
||||||
|
8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions
|
||||||
|
are used instead of the 8-bit ones. Returned 32-bit strings are converted to
|
||||||
|
8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled,
|
||||||
|
\fBpcretest\fP defaults to 32-bit and the \fB-32\fP option is ignored.
|
||||||
|
.P
|
||||||
|
When PCRE is being built, the \fBRunTest\fP script that is called by "make
|
||||||
|
check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit,
|
||||||
|
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "NOT SUPPORTED IN 32-BIT MODE"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
Not all the features of the 8-bit library are available with the 32-bit
|
||||||
|
library. The C++ and POSIX wrapper functions support only the 8-bit library,
|
||||||
|
and the \fBpcregrep\fP program is at present 8-bit only.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH AUTHOR
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
Philip Hazel
|
||||||
|
University Computing Service
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH REVISION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
Last updated: 08 November 2012
|
||||||
|
Copyright (c) 1997-2012 University of Cambridge.
|
||||||
|
.fi
|
61
tools/pcre/doc/pcre_assign_jit_stack.3
Normal file
61
tools/pcre/doc/pcre_assign_jit_stack.3
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
.TH PCRE_ASSIGN_JIT_STACK 3 "24 June 2012" "PCRE 8.30"
|
||||||
|
.SH NAME
|
||||||
|
PCRE - Perl-compatible regular expressions
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre.h>
|
||||||
|
.PP
|
||||||
|
.SM
|
||||||
|
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function provides control over the memory used as a stack at run-time by a
|
||||||
|
call to \fBpcre[16|32]_exec()\fP with a pattern that has been successfully
|
||||||
|
compiled with JIT optimization. The arguments are:
|
||||||
|
.sp
|
||||||
|
extra the data pointer returned by \fBpcre[16|32]_study()\fP
|
||||||
|
callback a callback function
|
||||||
|
data a JIT stack or a value to be passed to the callback
|
||||||
|
function
|
||||||
|
.P
|
||||||
|
If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block on
|
||||||
|
the machine stack is used.
|
||||||
|
.P
|
||||||
|
If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must
|
||||||
|
be a valid JIT stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP.
|
||||||
|
.P
|
||||||
|
If \fIcallback\fP not NULL, it is called with \fIdata\fP as an argument at
|
||||||
|
the start of matching, in order to set up a JIT stack. If the result is NULL,
|
||||||
|
the internal 32K stack is used; otherwise the return value must be a valid JIT
|
||||||
|
stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP.
|
||||||
|
.P
|
||||||
|
You may safely assign the same JIT stack to multiple patterns, as long as they
|
||||||
|
are all matched in the same thread. In a multithread application, each thread
|
||||||
|
must use its own JIT stack. For more details, see the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcrejit\fP
|
||||||
|
.\"
|
||||||
|
page.
|
||||||
|
.P
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreapi\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreposix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COMPILE 3
|
.TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -12,20 +12,32 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B const unsigned char *\fItableptr\fP);
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This function compiles a regular expression into an internal form. It is the
|
This function compiles a regular expression into an internal form. It is the
|
||||||
same as \fBpcre_compile2()\fP, except for the absence of the \fIerrorcodeptr\fP
|
same as \fBpcre[16|32]_compile2()\fP, except for the absence of the
|
||||||
argument. Its arguments are:
|
\fIerrorcodeptr\fP argument. Its arguments are:
|
||||||
.sp
|
.sp
|
||||||
\fIpattern\fR A zero-terminated string containing the
|
\fIpattern\fP A zero-terminated string containing the
|
||||||
regular expression to be compiled
|
regular expression to be compiled
|
||||||
\fIoptions\fR Zero or more option bits
|
\fIoptions\fP Zero or more option bits
|
||||||
\fIerrptr\fR Where to put an error message
|
\fIerrptr\fP Where to put an error message
|
||||||
\fIerroffset\fR Offset in pattern where error was found
|
\fIerroffset\fP Offset in pattern where error was found
|
||||||
\fItableptr\fR Pointer to character tables, or NULL to
|
\fItableptr\fP Pointer to character tables, or NULL to
|
||||||
use the built-in default
|
use the built-in default
|
||||||
.sp
|
.sp
|
||||||
The option bits are:
|
The option bits are:
|
||||||
@ -38,7 +50,7 @@ The option bits are:
|
|||||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||||
PCRE_DOTALL . matches anything including NL
|
PCRE_DOTALL . matches anything including NL
|
||||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||||
PCRE_EXTENDED Ignore whitespace and # comments
|
PCRE_EXTENDED Ignore white space and # comments
|
||||||
PCRE_EXTRA PCRE extra features
|
PCRE_EXTRA PCRE extra features
|
||||||
(not much use currently)
|
(not much use currently)
|
||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
@ -52,14 +64,23 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
PCRE_UTF8 Run in UTF-8 mode
|
validity (only relevant if
|
||||||
|
PCRE_UTF16 is set)
|
||||||
|
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF32 is set)
|
||||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF8 is set)
|
PCRE_UTF8 is set)
|
||||||
|
PCRE_UCP Use Unicode properties for \ed, \ew, etc.
|
||||||
|
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||||
|
PCRE_UTF16 Run in \fBpcre16_compile()\fP UTF-16 mode
|
||||||
|
PCRE_UTF32 Run in \fBpcre32_compile()\fP UTF-32 mode
|
||||||
|
PCRE_UTF8 Run in \fBpcre_compile()\fP UTF-8 mode
|
||||||
.sp
|
.sp
|
||||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
|
||||||
PCRE_NO_UTF8_CHECK.
|
PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
|
||||||
.P
|
.P
|
||||||
The yield of the function is a pointer to a private data structure that
|
The yield of the function is a pointer to a private data structure that
|
||||||
contains the compiled pattern, or NULL if an error was detected. Note that
|
contains the compiled pattern, or NULL if an error was detected. Note that
|
||||||
@ -68,10 +89,10 @@ version is not guaranteed to work and may cause crashes.
|
|||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcreapi\fR
|
\fBpcreapi\fP
|
||||||
.\"
|
.\"
|
||||||
page and a description of the POSIX API in the
|
page and a description of the POSIX API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcreposix\fR
|
\fBpcreposix\fP
|
||||||
.\"
|
.\"
|
||||||
page.
|
page.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COMPILE2 3
|
.TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -14,52 +14,81 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B const unsigned char *\fItableptr\fP);
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIerrorcodeptr\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
|
.PP
|
||||||
|
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIerrorcodeptr\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B const unsigned char *\fItableptr\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This function compiles a regular expression into an internal form. It is the
|
This function compiles a regular expression into an internal form. It is the
|
||||||
same as \fBpcre_compile()\fP, except for the addition of the \fIerrorcodeptr\fP
|
same as \fBpcre[16|32]_compile()\fP, except for the addition of the
|
||||||
argument. The arguments are:
|
\fIerrorcodeptr\fP argument. The arguments are:
|
||||||
|
.
|
||||||
.sp
|
.sp
|
||||||
\fIpattern\fR A zero-terminated string containing the
|
\fIpattern\fP A zero-terminated string containing the
|
||||||
regular expression to be compiled
|
regular expression to be compiled
|
||||||
\fIoptions\fR Zero or more option bits
|
\fIoptions\fP Zero or more option bits
|
||||||
\fIerrorcodeptr\fP Where to put an error code
|
\fIerrorcodeptr\fP Where to put an error code
|
||||||
\fIerrptr\fR Where to put an error message
|
\fIerrptr\fP Where to put an error message
|
||||||
\fIerroffset\fR Offset in pattern where error was found
|
\fIerroffset\fP Offset in pattern where error was found
|
||||||
\fItableptr\fR Pointer to character tables, or NULL to
|
\fItableptr\fP Pointer to character tables, or NULL to
|
||||||
use the built-in default
|
use the built-in default
|
||||||
.sp
|
.sp
|
||||||
The option bits are:
|
The option bits are:
|
||||||
.sp
|
.sp
|
||||||
PCRE_ANCHORED Force pattern anchoring
|
PCRE_ANCHORED Force pattern anchoring
|
||||||
PCRE_AUTO_CALLOUT Compile automatic callouts
|
PCRE_AUTO_CALLOUT Compile automatic callouts
|
||||||
PCRE_CASELESS Do caseless matching
|
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||||
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
||||||
PCRE_DOTALL . matches anything including NL
|
PCRE_CASELESS Do caseless matching
|
||||||
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
PCRE_DOLLAR_ENDONLY $ not to match newline at end
|
||||||
PCRE_EXTENDED Ignore whitespace and # comments
|
PCRE_DOTALL . matches anything including NL
|
||||||
PCRE_EXTRA PCRE extra features
|
PCRE_DUPNAMES Allow duplicate names for subpatterns
|
||||||
(not much use currently)
|
PCRE_EXTENDED Ignore white space and # comments
|
||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_EXTRA PCRE extra features
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
(not much use currently)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
sequences
|
||||||
theses (named ones available)
|
PCRE_NEWLINE_CR Set CR as the newline sequence
|
||||||
PCRE_UNGREEDY Invert greediness of quantifiers
|
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
||||||
PCRE_UTF8 Run in UTF-8 mode
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
validity (only relevant if
|
theses (named ones available)
|
||||||
PCRE_UTF8 is set)
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF16 is set)
|
||||||
|
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF32 is set)
|
||||||
|
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
|
||||||
|
validity (only relevant if
|
||||||
|
PCRE_UTF8 is set)
|
||||||
|
PCRE_UCP Use Unicode properties for \ed, \ew, etc.
|
||||||
|
PCRE_UNGREEDY Invert greediness of quantifiers
|
||||||
|
PCRE_UTF16 Run \fBpcre16_compile()\fP in UTF-16 mode
|
||||||
|
PCRE_UTF32 Run \fBpcre32_compile()\fP in UTF-32 mode
|
||||||
|
PCRE_UTF8 Run \fBpcre_compile()\fP in UTF-8 mode
|
||||||
.sp
|
.sp
|
||||||
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
|
PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
|
||||||
PCRE_NO_UTF8_CHECK.
|
PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
|
||||||
.P
|
.P
|
||||||
The yield of the function is a pointer to a private data structure that
|
The yield of the function is a pointer to a private data structure that
|
||||||
contains the compiled pattern, or NULL if an error was detected. Note that
|
contains the compiled pattern, or NULL if an error was detected. Note that
|
||||||
@ -68,10 +97,10 @@ version is not guaranteed to work and may cause crashes.
|
|||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcreapi\fR
|
\fBpcreapi\fP
|
||||||
.\"
|
.\"
|
||||||
page and a description of the POSIX API in the
|
page and a description of the POSIX API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcreposix\fR
|
\fBpcreposix\fP
|
||||||
.\"
|
.\"
|
||||||
page.
|
page.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_CONFIG 3
|
.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -8,19 +8,30 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.PP
|
.PP
|
||||||
.SM
|
.SM
|
||||||
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This function makes it possible for a client program to find out which optional
|
This function makes it possible for a client program to find out which optional
|
||||||
features are available in the version of the PCRE library it is using. Its
|
features are available in the version of the PCRE library it is using. The
|
||||||
arguments are as follows:
|
arguments are as follows:
|
||||||
.sp
|
.sp
|
||||||
\fIwhat\fR A code specifying what information is required
|
\fIwhat\fP A code specifying what information is required
|
||||||
\fIwhere\fR Points to where to put the data
|
\fIwhere\fP Points to where to put the data
|
||||||
.sp
|
.sp
|
||||||
The available codes are:
|
The \fIwhere\fP argument must point to an integer variable, except for
|
||||||
|
PCRE_CONFIG_MATCH_LIMIT and PCRE_CONFIG_MATCH_LIMIT_RECURSION, when it must
|
||||||
|
point to an unsigned long integer. The available codes are:
|
||||||
.sp
|
.sp
|
||||||
|
PCRE_CONFIG_JIT Availability of just-in-time compiler
|
||||||
|
support (1=yes 0=no)
|
||||||
|
PCRE_CONFIG_JITTARGET String containing information about the
|
||||||
|
target architecture for the JIT compiler,
|
||||||
|
or NULL if there is no JIT support
|
||||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||||
@ -35,23 +46,31 @@ The available codes are:
|
|||||||
0 all Unicode line endings
|
0 all Unicode line endings
|
||||||
1 CR, LF, or CRLF only
|
1 CR, LF, or CRLF only
|
||||||
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
|
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
|
||||||
Threshold of return slots, above
|
Threshold of return slots, above which
|
||||||
which \fBmalloc()\fR is used by
|
\fBmalloc()\fP is used by the POSIX API
|
||||||
the POSIX API
|
|
||||||
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
|
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
|
||||||
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no)
|
PCRE_CONFIG_UTF16 Availability of UTF-16 support (1=yes
|
||||||
|
0=no); option for \fBpcre16_config()\fP
|
||||||
|
PCRE_CONFIG_UTF32 Availability of UTF-32 support (1=yes
|
||||||
|
0=no); option for \fBpcre32_config()\fP
|
||||||
|
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no);
|
||||||
|
option for \fBpcre_config()\fP
|
||||||
PCRE_CONFIG_UNICODE_PROPERTIES
|
PCRE_CONFIG_UNICODE_PROPERTIES
|
||||||
Availability of Unicode property support
|
Availability of Unicode property support
|
||||||
(1=yes 0=no)
|
(1=yes 0=no)
|
||||||
.sp
|
.sp
|
||||||
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise.
|
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. That error
|
||||||
|
is also given if PCRE_CONFIG_UTF16 or PCRE_CONFIG_UTF32 is passed to
|
||||||
|
\fBpcre_config()\fP, if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 is passed to
|
||||||
|
\fBpcre16_config()\fP, or if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 is passed to
|
||||||
|
\fBpcre32_config()\fP.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcreapi\fR
|
\fBpcreapi\fP
|
||||||
.\"
|
.\"
|
||||||
page and a description of the POSIX API in the
|
page and a description of the POSIX API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcreposix\fR
|
\fBpcreposix\fP
|
||||||
.\"
|
.\"
|
||||||
page.
|
page.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COPY_NAMED_SUBSTRING 3
|
.TH PCRE_COPY_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -14,6 +14,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -23,8 +39,8 @@ by name, into a given buffer. The arguments are:
|
|||||||
.sp
|
.sp
|
||||||
\fIcode\fP Pattern that was successfully matched
|
\fIcode\fP Pattern that was successfully matched
|
||||||
\fIsubject\fP Subject that has been successfully matched
|
\fIsubject\fP Subject that has been successfully matched
|
||||||
\fIovector\fP Offset vector that \fBpcre_exec()\fP used
|
\fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
|
||||||
\fIstringcount\fP Value returned by \fBpcre_exec()\fP
|
\fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
|
||||||
\fIstringname\fP Name of the required substring
|
\fIstringname\fP Name of the required substring
|
||||||
\fIbuffer\fP Buffer to receive the string
|
\fIbuffer\fP Buffer to receive the string
|
||||||
\fIbuffersize\fP Size of buffer
|
\fIbuffersize\fP Size of buffer
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COPY_SUBSTRING 3
|
.TH PCRE_COPY_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -12,6 +12,18 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B int \fIbuffersize\fP);
|
.B int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIbuffersize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIbuffersize\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -20,8 +32,8 @@ This is a convenience function for extracting a captured substring into a given
|
|||||||
buffer. The arguments are:
|
buffer. The arguments are:
|
||||||
.sp
|
.sp
|
||||||
\fIsubject\fP Subject that has been successfully matched
|
\fIsubject\fP Subject that has been successfully matched
|
||||||
\fIovector\fP Offset vector that \fBpcre_exec()\fP used
|
\fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
|
||||||
\fIstringcount\fP Value returned by \fBpcre_exec()\fP
|
\fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
|
||||||
\fIstringnumber\fP Number of the required substring
|
\fIstringnumber\fP Number of the required substring
|
||||||
\fIbuffer\fP Buffer to receive the string
|
\fIbuffer\fP Buffer to receive the string
|
||||||
\fIbuffersize\fP Size of buffer
|
\fIbuffersize\fP Size of buffer
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_DFA_EXEC 3
|
.TH PCRE_DFA_EXEC 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -14,6 +14,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -21,10 +37,11 @@ PCRE - Perl-compatible regular expressions
|
|||||||
This function matches a compiled regular expression against a given subject
|
This function matches a compiled regular expression against a given subject
|
||||||
string, using an alternative matching algorithm that scans the subject string
|
string, using an alternative matching algorithm that scans the subject string
|
||||||
just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible,
|
just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible,
|
||||||
matching function is \fBpcre_exec()\fP. The arguments for this function are:
|
matching function is \fBpcre[16|32]_exec()\fP. The arguments for this function
|
||||||
|
are:
|
||||||
.sp
|
.sp
|
||||||
\fIcode\fP Points to the compiled pattern
|
\fIcode\fP Points to the compiled pattern
|
||||||
\fIextra\fP Points to an associated \fBpcre_extra\fP structure,
|
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
\fIsubject\fP Points to the subject string
|
\fIsubject\fP Points to the subject string
|
||||||
\fIlength\fP Length of the subject string, in bytes
|
\fIlength\fP Length of the subject string, in bytes
|
||||||
@ -38,45 +55,64 @@ matching function is \fBpcre_exec()\fP. The arguments for this function are:
|
|||||||
.sp
|
.sp
|
||||||
The options are:
|
The options are:
|
||||||
.sp
|
.sp
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||||
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
|
||||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
PCRE_NEWLINE_CR Recognize CR as the only newline sequence
|
||||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
|
||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Recognize LF as the only newline sequence
|
||||||
PCRE_NOTBOL Subject is not the beginning of a line
|
PCRE_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE_NOTEOL Subject is not the end of a line
|
PCRE_NOTEOL Subject is not the end of a line
|
||||||
PCRE_NOTEMPTY An empty string is not a valid match
|
PCRE_NOTEMPTY An empty string is not a valid match
|
||||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
is not a valid match
|
||||||
validity (only relevant if PCRE_UTF8
|
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||||
was set at compile time)
|
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
|
||||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
validity (only relevant if PCRE_UTF16
|
||||||
PCRE_DFA_SHORTEST Return only the shortest match
|
was set at compile time)
|
||||||
PCRE_DFA_RESTART This is a restart after a partial match
|
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
|
||||||
|
validity (only relevant if PCRE_UTF32
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||||
|
validity (only relevant if PCRE_UTF8
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
|
||||||
|
PCRE_PARTIAL_SOFT ) match if no full matches are found
|
||||||
|
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
|
||||||
|
even if there is a full match as well
|
||||||
|
PCRE_DFA_SHORTEST Return only the shortest match
|
||||||
|
PCRE_DFA_RESTART Restart after a partial match
|
||||||
.sp
|
.sp
|
||||||
There are restrictions on what may appear in a pattern when using this matching
|
There are restrictions on what may appear in a pattern when using this matching
|
||||||
function. Details are given in the
|
function. Details are given in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrematching\fP
|
\fBpcrematching\fP
|
||||||
.\"
|
.\"
|
||||||
documentation.
|
documentation. For details of partial matching, see the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcrepartial\fP
|
||||||
|
.\"
|
||||||
|
page.
|
||||||
.P
|
.P
|
||||||
A \fBpcre_extra\fP structure contains the following fields:
|
A \fBpcre[16|32]_extra\fP structure contains the following fields:
|
||||||
.sp
|
.sp
|
||||||
\fIflags\fP Bits indicating which fields are set
|
\fIflags\fP Bits indicating which fields are set
|
||||||
\fIstudy_data\fP Opaque data from \fBpcre_study()\fP
|
\fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP
|
||||||
\fImatch_limit\fP Limit on internal resource use
|
\fImatch_limit\fP Limit on internal resource use
|
||||||
\fImatch_limit_recursion\fP Limit on internal recursion depth
|
\fImatch_limit_recursion\fP Limit on internal recursion depth
|
||||||
\fIcallout_data\fP Opaque data passed back to callouts
|
\fIcallout_data\fP Opaque data passed back to callouts
|
||||||
\fItables\fP Points to character tables or is NULL
|
\fItables\fP Points to character tables or is NULL
|
||||||
|
\fImark\fP For passing back a *MARK pointer
|
||||||
|
\fIexecutable_jit\fP Opaque data from JIT compilation
|
||||||
.sp
|
.sp
|
||||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
|
||||||
PCRE_EXTRA_TABLES. For this matching function, the \fImatch_limit\fP and
|
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. For this
|
||||||
\fImatch_limit_recursion\fP fields are not used, and must not be set.
|
matching function, the \fImatch_limit\fP and \fImatch_limit_recursion\fP fields
|
||||||
|
are not used, and must not be set. The PCRE_EXTRA_EXECUTABLE_JIT flag and
|
||||||
|
the corresponding variable are ignored.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_EXEC 3
|
.TH PCRE_EXEC 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -12,6 +12,18 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -21,7 +33,7 @@ string, using a matching algorithm that is similar to Perl's. It returns
|
|||||||
offsets to captured substrings. Its arguments are:
|
offsets to captured substrings. Its arguments are:
|
||||||
.sp
|
.sp
|
||||||
\fIcode\fP Points to the compiled pattern
|
\fIcode\fP Points to the compiled pattern
|
||||||
\fIextra\fP Points to an associated \fBpcre_extra\fP structure,
|
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
\fIsubject\fP Points to the subject string
|
\fIsubject\fP Points to the subject string
|
||||||
\fIlength\fP Length of the subject string, in bytes
|
\fIlength\fP Length of the subject string, in bytes
|
||||||
@ -33,42 +45,52 @@ offsets to captured substrings. Its arguments are:
|
|||||||
.sp
|
.sp
|
||||||
The options are:
|
The options are:
|
||||||
.sp
|
.sp
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||||
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
PCRE_BSR_UNICODE \eR matches all Unicode line endings
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
|
||||||
PCRE_NEWLINE_CR Set CR as the newline sequence
|
PCRE_NEWLINE_CR Recognize CR as the only newline sequence
|
||||||
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
|
PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
|
||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Recognize LF as the only newline sequence
|
||||||
PCRE_NOTBOL Subject is not the beginning of a line
|
PCRE_NOTBOL Subject string is not the beginning of a line
|
||||||
PCRE_NOTEOL Subject is not the end of a line
|
PCRE_NOTEOL Subject string is not the end of a line
|
||||||
PCRE_NOTEMPTY An empty string is not a valid match
|
PCRE_NOTEMPTY An empty string is not a valid match
|
||||||
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||||
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
is not a valid match
|
||||||
validity (only relevant if PCRE_UTF8
|
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
|
||||||
was set at compile time)
|
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
|
||||||
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match
|
validity (only relevant if PCRE_UTF16
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
|
||||||
|
validity (only relevant if PCRE_UTF32
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
|
||||||
|
validity (only relevant if PCRE_UTF8
|
||||||
|
was set at compile time)
|
||||||
|
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
|
||||||
|
PCRE_PARTIAL_SOFT ) match if no full matches are found
|
||||||
|
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
|
||||||
|
if that is found before a full match
|
||||||
.sp
|
.sp
|
||||||
There are restrictions on what may appear in a pattern when partial matching is
|
For details of partial matching, see the
|
||||||
requested. For details, see the
|
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrepartial\fP
|
\fBpcrepartial\fP
|
||||||
.\"
|
.\"
|
||||||
page.
|
page. A \fBpcre_extra\fP structure contains the following fields:
|
||||||
.P
|
|
||||||
A \fBpcre_extra\fP structure contains the following fields:
|
|
||||||
.sp
|
.sp
|
||||||
\fIflags\fP Bits indicating which fields are set
|
\fIflags\fP Bits indicating which fields are set
|
||||||
\fIstudy_data\fP Opaque data from \fBpcre_study()\fP
|
\fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP
|
||||||
\fImatch_limit\fP Limit on internal resource use
|
\fImatch_limit\fP Limit on internal resource use
|
||||||
\fImatch_limit_recursion\fP Limit on internal recursion depth
|
\fImatch_limit_recursion\fP Limit on internal recursion depth
|
||||||
\fIcallout_data\fP Opaque data passed back to callouts
|
\fIcallout_data\fP Opaque data passed back to callouts
|
||||||
\fItables\fP Points to character tables or is NULL
|
\fItables\fP Points to character tables or is NULL
|
||||||
|
\fImark\fP For passing back a *MARK pointer
|
||||||
|
\fIexecutable_jit\fP Opaque data from JIT compilation
|
||||||
.sp
|
.sp
|
||||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
|
||||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
|
||||||
PCRE_EXTRA_TABLES.
|
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
31
tools/pcre/doc/pcre_free_study.3
Normal file
31
tools/pcre/doc/pcre_free_study.3
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
.TH PCRE_FREE_STUDY 3 "24 June 2012" "PCRE 8.30"
|
||||||
|
.SH NAME
|
||||||
|
PCRE - Perl-compatible regular expressions
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre.h>
|
||||||
|
.PP
|
||||||
|
.SM
|
||||||
|
.B void pcre_free_study(pcre_extra *\fIextra\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function is used to free the memory used for the data generated by a call
|
||||||
|
to \fBpcre[16|32]_study()\fP when it is no longer needed. The argument must be the
|
||||||
|
result of such a call.
|
||||||
|
.P
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreapi\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreposix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_FREE_SUBSTRING 3
|
.TH PCRE_FREE_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -8,13 +8,17 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.PP
|
.PP
|
||||||
.SM
|
.SM
|
||||||
.B void pcre_free_substring(const char *\fIstringptr\fP);
|
.B void pcre_free_substring(const char *\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This is a convenience function for freeing the store obtained by a previous
|
This is a convenience function for freeing the store obtained by a previous
|
||||||
call to \fBpcre_get_substring()\fP or \fBpcre_get_named_substring()\fP. Its
|
call to \fBpcre[16|32]_get_substring()\fP or \fBpcre[16|32]_get_named_substring()\fP.
|
||||||
only argument is a pointer to the string.
|
Its only argument is a pointer to the string.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_FREE_SUBSTRING_LIST 3
|
.TH PCRE_FREE_SUBSTRING_LIST 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -8,13 +8,17 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.PP
|
.PP
|
||||||
.SM
|
.SM
|
||||||
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
|
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This is a convenience function for freeing the store obtained by a previous
|
This is a convenience function for freeing the store obtained by a previous
|
||||||
call to \fBpcre_get_substring_list()\fP. Its only argument is a pointer to the
|
call to \fBpcre[16|32]_get_substring_list()\fP. Its only argument is a pointer to
|
||||||
list of string pointers.
|
the list of string pointers.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_FULLINFO 3
|
.TH PCRE_FULLINFO 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -10,6 +10,14 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -17,7 +25,7 @@ PCRE - Perl-compatible regular expressions
|
|||||||
This function returns information about a compiled pattern. Its arguments are:
|
This function returns information about a compiled pattern. Its arguments are:
|
||||||
.sp
|
.sp
|
||||||
\fIcode\fP Compiled regular expression
|
\fIcode\fP Compiled regular expression
|
||||||
\fIextra\fP Result of \fBpcre_study()\fP or NULL
|
\fIextra\fP Result of \fBpcre[16|32]_study()\fP or NULL
|
||||||
\fIwhat\fP What information is required
|
\fIwhat\fP What information is required
|
||||||
\fIwhere\fP Where to put the information
|
\fIwhere\fP Where to put the information
|
||||||
.sp
|
.sp
|
||||||
@ -26,20 +34,48 @@ The following information is available:
|
|||||||
PCRE_INFO_BACKREFMAX Number of highest back reference
|
PCRE_INFO_BACKREFMAX Number of highest back reference
|
||||||
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
|
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
|
||||||
PCRE_INFO_DEFAULT_TABLES Pointer to default tables
|
PCRE_INFO_DEFAULT_TABLES Pointer to default tables
|
||||||
PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or
|
PCRE_INFO_FIRSTBYTE Fixed first data unit for a match, or
|
||||||
-1 for start of string
|
-1 for start of string
|
||||||
or after newline, or
|
or after newline, or
|
||||||
-2 otherwise
|
-2 otherwise
|
||||||
PCRE_INFO_FIRSTTABLE Table of first bytes (after studying)
|
PCRE_INFO_FIRSTTABLE Table of first data units (after studying)
|
||||||
|
PCRE_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist
|
||||||
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||||
PCRE_INFO_LASTLITERAL Literal last byte required
|
PCRE_INFO_JIT Return 1 after successful JIT compilation
|
||||||
|
PCRE_INFO_JITSIZE Size of JIT compiled code
|
||||||
|
PCRE_INFO_LASTLITERAL Literal last data unit required
|
||||||
|
PCRE_INFO_MINLENGTH Lower bound length of matching strings
|
||||||
PCRE_INFO_NAMECOUNT Number of named subpatterns
|
PCRE_INFO_NAMECOUNT Number of named subpatterns
|
||||||
PCRE_INFO_NAMEENTRYSIZE Size of name table entry
|
PCRE_INFO_NAMEENTRYSIZE Size of name table entry
|
||||||
PCRE_INFO_NAMETABLE Pointer to name table
|
PCRE_INFO_NAMETABLE Pointer to name table
|
||||||
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
|
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
|
||||||
|
(always returns 1 after release 8.00)
|
||||||
PCRE_INFO_OPTIONS Option bits used for compilation
|
PCRE_INFO_OPTIONS Option bits used for compilation
|
||||||
PCRE_INFO_SIZE Size of compiled pattern
|
PCRE_INFO_SIZE Size of compiled pattern
|
||||||
PCRE_INFO_STUDYSIZE Size of study data
|
PCRE_INFO_STUDYSIZE Size of study data
|
||||||
|
PCRE_INFO_FIRSTCHARACTER Fixed first data unit for a match
|
||||||
|
PCRE_INFO_FIRSTCHARACTERFLAGS Returns
|
||||||
|
1 if there is a first data character set, which can
|
||||||
|
then be retrieved using PCRE_INFO_FIRSTCHARACTER,
|
||||||
|
2 if the first character is at the start of the data
|
||||||
|
string or after a newline, and
|
||||||
|
0 otherwise
|
||||||
|
PCRE_INFO_REQUIREDCHAR Literal last data unit required
|
||||||
|
PCRE_INFO_REQUIREDCHARFLAGS Returns 1 if the last data character is set (which can then
|
||||||
|
be retrieved using PCRE_INFO_REQUIREDCHAR); 0 otherwise
|
||||||
|
.sp
|
||||||
|
The \fIwhere\fP argument must point to an integer variable, except for the
|
||||||
|
following \fIwhat\fP values:
|
||||||
|
.sp
|
||||||
|
PCRE_INFO_DEFAULT_TABLES const unsigned char *
|
||||||
|
PCRE_INFO_FIRSTTABLE const unsigned char *
|
||||||
|
PCRE_INFO_NAMETABLE PCRE_SPTR16 (16-bit library)
|
||||||
|
PCRE_INFO_NAMETABLE PCRE_SPTR32 (32-bit library)
|
||||||
|
PCRE_INFO_NAMETABLE const unsigned char * (8-bit library)
|
||||||
|
PCRE_INFO_OPTIONS unsigned long int
|
||||||
|
PCRE_INFO_SIZE size_t
|
||||||
|
PCRE_INFO_FIRSTCHARACTER uint32_t
|
||||||
|
PCRE_INFO_REQUIREDCHAR uint32_t
|
||||||
.sp
|
.sp
|
||||||
The yield of the function is zero on success or:
|
The yield of the function is zero on success or:
|
||||||
.sp
|
.sp
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_GET_NAMED_SUBSTRING 3
|
.TH PCRE_GET_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -14,6 +14,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||||
.ti +5n
|
.ti +5n
|
||||||
.B const char **\fIstringptr\fP);
|
.B const char **\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||||
|
.PP
|
||||||
|
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||||
|
.ti +5n
|
||||||
|
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -23,16 +39,17 @@ arguments are:
|
|||||||
.sp
|
.sp
|
||||||
\fIcode\fP Compiled pattern
|
\fIcode\fP Compiled pattern
|
||||||
\fIsubject\fP Subject that has been successfully matched
|
\fIsubject\fP Subject that has been successfully matched
|
||||||
\fIovector\fP Offset vector that \fBpcre_exec()\fP used
|
\fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
|
||||||
\fIstringcount\fP Value returned by \fBpcre_exec()\fP
|
\fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
|
||||||
\fIstringname\fP Name of the required substring
|
\fIstringname\fP Name of the required substring
|
||||||
\fIstringptr\fP Where to put the string pointer
|
\fIstringptr\fP Where to put the string pointer
|
||||||
.sp
|
.sp
|
||||||
The memory in which the substring is placed is obtained by calling
|
The memory in which the substring is placed is obtained by calling
|
||||||
\fBpcre_malloc()\fP. The convenience function \fBpcre_free_substring()\fP can
|
\fBpcre[16|32]_malloc()\fP. The convenience function
|
||||||
be used to free it when it is no longer needed. The yield of the function is
|
\fBpcre[16|32]_free_substring()\fP can be used to free it when it is no longer
|
||||||
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory
|
needed. The yield of the function is the length of the extracted substring,
|
||||||
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
||||||
|
PCRE_ERROR_NOSUBSTRING if the string name is invalid.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE native API in the
|
There is a complete description of the PCRE native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user